├── .asf.yaml
├── NOTICE
├── .gitignore
├── src
├── main
│ └── java
│ │ └── org
│ │ └── apache
│ │ └── datasketches
│ │ └── vector
│ │ ├── package-info.java
│ │ ├── decomposition
│ │ ├── package-info.java
│ │ ├── SVDAlgo.java
│ │ ├── MatrixOps.java
│ │ ├── MatrixOpsImplOjAlgo.java
│ │ ├── PreambleUtil.java
│ │ └── FrequentDirections.java
│ │ ├── matrix
│ │ ├── package-info.java
│ │ ├── MatrixType.java
│ │ ├── MatrixBuilder.java
│ │ ├── Matrix.java
│ │ ├── MatrixImplOjAlgo.java
│ │ └── MatrixPreambleUtil.java
│ │ └── MatrixFamily.java
└── test
│ └── java
│ └── org
│ └── apache
│ └── datasketches
│ └── vector
│ ├── matrix
│ ├── MatrixBuilderTest.java
│ ├── MatrixTest.java
│ └── MatrixImplOjAlgoTest.java
│ └── decomposition
│ ├── MatrixOpsTest.java
│ └── FrequentDirectionsTest.java
├── README.md
├── tools
├── FindBugsExcludeFilter.xml
└── SketchesCheckstyle.xml
├── LICENSE
└── pom.xml
/.asf.yaml:
--------------------------------------------------------------------------------
1 | github:
2 | homepage: https://datasketches.apache.org
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Apache DataSketches Vector
2 | Copyright 2020 - The Apache Software Foundation
3 |
4 | Copyright 2015-2018 Yahoo
5 | Copyright 2019 Verizon Media
6 |
7 | This product includes software developed at
8 | The Apache Software Foundation (http://www.apache.org/).
9 |
10 | Prior to moving to ASF, the software for this project was developed at
11 | Yahoo (now Verizon Media) (https://developer.yahoo.com).
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Eclipse project files
2 | .classpath
3 | .project
4 | .settings/
5 | .checkstyle
6 |
7 | # IntelliJ project files
8 | *.iml
9 | *.ipr
10 | *.iws
11 |
12 | # Additional tools
13 | .clover/
14 |
15 | # OSX files
16 | .DS_Store
17 |
18 | # Compiler output, class files
19 | *.class
20 | bin/
21 |
22 | # Log file
23 | *.log
24 |
25 | # BlueJ files
26 | *.ctxt
27 |
28 | # Mobile Tools for Java (J2ME)
29 | .mtj.tmp/
30 |
31 | # Package Files #
32 | *.jar
33 | *.war
34 | *.ear
35 | *.zip
36 | *.tar.gz
37 | *.rar
38 |
39 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
40 | hs_err_pid*
41 |
42 | #Test config and output
43 | /target/
44 | test-output/
45 | local/
46 | reports/
47 | .pmd
48 | tmp/
49 |
50 | # Build artifacts
51 | out/
52 | build/
53 | jarsIn/
54 | build.xml
55 | .idea
56 | *.properties
57 | *.releaseBackup
58 | *.next
59 | *.tag
60 |
61 | # Jekyll
62 | Gemfile
63 | Gemfile.lock
64 | _site/
65 | _*
66 | _*/
67 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This is the parent package for all vector sketch algorithms. Any classes at this level are
22 | * used by more than one sub-package.
23 | */
24 | package org.apache.datasketches.vector;
25 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/decomposition/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | *
This package is dedicated to streaming algorithms that enable approximate matrix
22 | * decompositions.
23 | *
24 | *
These sketches are mergeable and can be serialized and deserialized to/from a compact
25 | * form.
26 | */
27 | package org.apache.datasketches.vector.decomposition;
28 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/matrix/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | *
This package contains a Matrix class that wraps one of several underlying matrix
22 | * implementations. It can be used to provide a stable public API independent of the
23 | * specific linear algebra package used for computation.
24 | *
25 | *
These Matrix objects can be serialized and deserialized to/from a compact form.
26 | */
27 | package org.apache.datasketches.vector.matrix;
28 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/matrix/MatrixType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | /**
23 | * The MatrixType enum
24 | */
25 | public enum MatrixType {
26 | /**
27 | * Select the ojAlgo
28 | */
29 | OJALGO(1, "ojAlgo");
30 |
31 | private int id_;
32 | private String name_;
33 |
34 | MatrixType(final int id, final String name) {
35 | id_ = id;
36 | name_ = name;
37 | }
38 |
39 | /**
40 | * Gets the ID
41 | * @return the ID
42 | */
43 | public int getId() { return id_; }
44 |
45 | /**
46 | * Gets the name
47 | * @return the name
48 | */
49 | public String getName() { return name_; }
50 |
51 | @Override
52 | public String toString() { return name_; }
53 | }
54 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/vector/matrix/MatrixBuilderTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | import static org.testng.Assert.assertEquals;
23 | import static org.testng.Assert.assertNotNull;
24 |
25 | import org.testng.annotations.Test;
26 |
27 | @SuppressWarnings("javadoc")
28 | public class MatrixBuilderTest {
29 | @Test
30 | public void checkBuild() {
31 | final MatrixBuilder builder = new MatrixBuilder();
32 | assertEquals(builder.getBackingType(), MatrixType.OJALGO); // default type
33 |
34 | Matrix m = builder.build(128, 512);
35 | assertNotNull(m);
36 |
37 | m = builder.build(128, 512);
38 | assertNotNull(m);
39 | }
40 |
41 | @Test
42 | public void checkSetType() {
43 | final MatrixBuilder builder = new MatrixBuilder();
44 | final MatrixType type = builder.getBackingType();
45 | assertEquals(type, MatrixType.OJALGO); // default type
46 | assertEquals(type.getId(), MatrixType.OJALGO.getId());
47 | assertEquals(type.getName(), MatrixType.OJALGO.getName());
48 | }
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/matrix/MatrixBuilder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | /**
23 | * Provides a builder for Matrix objects.
24 | */
25 | public class MatrixBuilder {
26 |
27 | private MatrixType type_ = MatrixType.OJALGO; // default type
28 |
29 | /**
30 | * Default no-op constructor.
31 | */
32 | public MatrixBuilder() {}
33 |
34 | /**
35 | * Sets the underlying type of object to use with any Matrix objects created.
36 | * @param type One of the supported types
37 | * @return This MatrixBuilder object
38 | */
39 | public MatrixBuilder setType(final MatrixType type) {
40 | type_ = type;
41 | return this;
42 | }
43 |
44 | /**
45 | * Returns a value from an enum defining the type of object backing any Matrix objects created.
46 | * @return An item from the MatrixType enum.
47 | */
48 | public MatrixType getBackingType() {
49 | return type_;
50 | }
51 |
52 | /**
53 | * Instantiates a new, empty matrix of the target size
54 | *
55 | * @param numRows Number of rows in matrix
56 | * @param numCols Number of columns in matrix
57 | * @return An empty matrix of the requested size
58 | */
59 | public Matrix build(final int numRows, final int numCols) {
60 | switch (type_) {
61 | case OJALGO:
62 | return MatrixImplOjAlgo.newInstance(numRows, numCols);
63 |
64 | default:
65 | throw new IllegalArgumentException("OJALGO is currently the only supported MatrixTypes");
66 | }
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/decomposition/SVDAlgo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.decomposition;
21 |
22 | /**
23 | * This class allows a choice of algorithms for Singular Value Decomposition. The options are:
24 | *
25 | *
FULL: The matrix library's default SVD implementation.
26 | *
SISVD: Simultaneous iteration, an approximate method likely to be more efficient only with sparse
27 | * matrices or when k is significantly smaller than the number of rows in the sketch.
28 | *
SYM: Takes advantage of matrix dimensionality, first computing eigenvalues of AA^T, then computes
29 | * intended results. Squaring A alters condition number and may cause numeric stability issues,
30 | * but unlikely an issue for Frequent Directions since discarding the smaller singular values/vectors.
31 | *
32 | */
33 | public enum SVDAlgo {
34 |
35 | /**
36 | * The matrix library's default SVD implementation.
37 | */
38 | FULL(1, "Full"),
39 |
40 | /**
41 | * Simultaneous iteration, an approximate method likely to be more efficient only with sparse
42 | * matrices or when k is significantly smaller than the number of rows in the sketch.
43 | */
44 | SISVD(2, "SISVD"),
45 |
46 | /**
47 | * Takes advantage of matrix dimensionality, first computing eigenvalues of AA^T, then computes
48 | * intended results. Squaring A alters condition number and may cause numeric stability issues,
49 | * but unlikely an issue for Frequent Directions since discarding the smaller singular values/vectors.
50 | */
51 | SYM(3, "Symmetrized");
52 |
53 | private int id_;
54 | private String name_;
55 |
56 | SVDAlgo(final int id, final String name) {
57 | id_ = id;
58 | name_ = name;
59 | }
60 |
61 | /**
62 | * Returns the ID.
63 | * @return the ID.
64 | */
65 | public int getId() { return id_; }
66 |
67 | /**
68 | * Gets the name
69 | * @return the name
70 | */
71 | public String getName() { return name_; }
72 |
73 | @Override
74 | public String toString() { return name_; }
75 | }
76 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # DataSketches Java Vector Library (Experimental)
21 | This code is offered "as is" and may not be to the quality of code in, for example, the core datasketches-java repository.
22 |
23 | Please visit the main [DataSketches website](https://datasketches.apache.org) for more information.
24 |
25 | If you are interested in making contributions to this site please see our [Community](https://datasketches.apache.org/docs/Community/) page for how to contact us.
26 |
27 |
28 | ## Build Instructions
29 | __NOTE:__ This component accesses resource files for testing. As a result, the directory elements of the full absolute path of the target installation directory must qualify as Java identifiers. In other words, the directory elements must not have any space characters (or non-Java identifier characters) in any of the path elements. This is required by the Oracle Java Specification in order to ensure location-independent access to resources: [See Oracle Location-Independent Access to Resources](https://docs.oracle.com/javase/8/docs/technotes/guides/lang/resources.html)
30 |
31 | ### JDK8 is required to compile
32 | This DataSketches component is pure Java and you must compile using JDK 8.
33 |
34 | ### Recommended Build Tool
35 | This DataSketches component is structured as a Maven project and Maven is the recommended Build Tool.
36 |
37 | There are two types of tests: normal unit tests and tests run by the strict profile.
38 |
39 | To run normal unit tests:
40 |
41 | $ mvn clean test
42 |
43 | To run the strict profile tests:
44 |
45 | $ mvn clean test -P strict
46 |
47 | To install jars built from the downloaded source:
48 |
49 | $ mvn clean install -DskipTests=true
50 |
51 | This will create the following jars:
52 |
53 | * datasketches-vector-X.Y.Z.jar The compiled main class files.
54 | * datasketches-vector-X.Y.Z-tests.jar The compiled test class files.
55 | * datasketches-vector-X.Y.Z-sources.jar The main source files.
56 | * datasketches-vector-X.Y.Z-test-sources.jar The test source files
57 | * datasketches-vector-X.Y.Z-javadoc.jar The compressed Javadocs.
58 |
59 | ### Dependencies
60 |
61 | #### Run-time
62 | There are two run-time dependencies:
63 |
64 | * org.ojalgo : ojalgo
65 | * org.apache.datasketches : datasketches-memory
66 |
67 | #### Testing
68 | See the pom.xml file for test dependencies.
69 |
70 |
--------------------------------------------------------------------------------
/tools/FindBugsExcludeFilter.xml:
--------------------------------------------------------------------------------
1 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
64 |
65 |
66 |
71 |
72 |
77 |
83 |
84 |
89 |
90 |
101 |
102 |
110 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/vector/decomposition/MatrixOpsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.decomposition;
21 |
22 | import static org.testng.Assert.assertEquals;
23 | import static org.testng.Assert.fail;
24 |
25 | import org.testng.annotations.Test;
26 |
27 | import org.apache.datasketches.vector.matrix.Matrix;
28 | import org.apache.datasketches.vector.matrix.MatrixBuilder;
29 | import org.apache.datasketches.vector.matrix.MatrixType;
30 |
31 | @SuppressWarnings("javadoc")
32 | public class MatrixOpsTest {
33 |
34 | @Test
35 | public void compareSVDAccuracy() {
36 | final int d = 10;
37 | final int k = 6;
38 | final Matrix input = generateIncreasingEye(d, 2 * k);
39 |
40 | final MatrixOps moFull = MatrixOps.newInstance(input, SVDAlgo.FULL, k);
41 | final MatrixOps moSym = MatrixOps.newInstance(input, SVDAlgo.SYM, k);
42 | final MatrixOps moSISVD = MatrixOps.newInstance(input, SVDAlgo.SISVD, k);
43 | moSISVD.setNumSISVDIter(50 * k); // intentionally run many extra iterations for tighter convegence
44 |
45 | // just singular values first
46 | moFull.svd(input, false);
47 | moSym.svd(input, false);
48 | moSISVD.svd(input, false);
49 | final double[] fullSv = moFull.getSingularValues();
50 | compareSingularValues(fullSv, moSym.getSingularValues(), fullSv.length);
51 | compareSingularValues(fullSv, moSISVD.getSingularValues(), k); // SISVD only produces k values
52 |
53 | // now with vectors
54 | moFull.svd(input, true);
55 | moSym.svd(input, true);
56 | moSISVD.svd(input, true);
57 | // TODO: better comparison is vector-wise, testing that sign changes are consistent but that
58 | // requires non-zero elements
59 | final Matrix fullVt = moFull.getVt();
60 | compareMatrixElementMagnitudes(fullVt, moSym.getVt(), (int) fullVt.getNumRows());
61 | compareMatrixElementMagnitudes(fullVt, moSISVD.getVt(), k); // SISVD only produces k vectors
62 |
63 | // just to be sure
64 | compareMatrixElementMagnitudes(fullVt, moFull.getVt(input), (int) fullVt.getNumRows());
65 | }
66 |
67 | @Test
68 | public void checkInvalidMatrixSize() {
69 | final int d = 10;
70 | final int k = 6;
71 | final Matrix A = generateIncreasingEye(d, 2 * k);
72 | final MatrixOps mo = MatrixOps.newInstance(A, SVDAlgo.FULL, k);
73 |
74 | Matrix B = generateIncreasingEye(d, (2 * k) + 1);
75 | try {
76 | mo.svd(B, true);
77 | fail();
78 | } catch (final IllegalArgumentException e) {
79 | // expected
80 | }
81 |
82 | B = generateIncreasingEye(d - 1, 2 * k);
83 | try {
84 | mo.svd(B, false);
85 | fail();
86 | } catch (final IllegalArgumentException e) {
87 | // expected
88 | }
89 |
90 | }
91 |
92 | private static void compareSingularValues(final double[] A, final double[] B, final int n) {
93 | assertEquals(A.length, B.length);
94 |
95 | for (int i = 0; i < n; ++i) {
96 | assertEquals(A[i], B[i], 1e-6);
97 | }
98 | }
99 |
100 |
101 | private static void compareMatrixElementMagnitudes(final Matrix A, final Matrix B, final int n) {
102 | assertEquals(A.getNumColumns(), B.getNumColumns());
103 | assertEquals(A.getNumRows(), B.getNumRows());
104 |
105 | for (int i = 0; i < n; ++i) {
106 | for (int j = 0; j < A.getNumColumns(); ++j) {
107 | assertEquals(Math.abs(A.getElement(i, j)), Math.abs(B.getElement(i, j)), 1e-6);
108 | }
109 | }
110 | }
111 |
112 | /**
113 | * Creates a scaled I matrix, where the diagonal consists of increasing integers,
114 | * starting with 1.0.
115 | * @param nRows number of rows
116 | * @param nCols number of columns
117 | * @return PrimitiveDenseStore, suitable for direct use or wrapping
118 | */
119 | private static Matrix generateIncreasingEye(final int nRows, final int nCols) {
120 | final Matrix m = new MatrixBuilder().setType(MatrixType.OJALGO).build(nRows, nCols);
121 | for (int i = 0; (i < nRows) && (i < nCols); ++i) {
122 | m.setElement(i, i, 1.0 + i);
123 | }
124 | return m;
125 | }
126 |
127 | }
128 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/decomposition/MatrixOps.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package org.apache.datasketches.vector.decomposition;
20 |
21 | import org.apache.datasketches.vector.matrix.Matrix;
22 |
23 | /**
24 | * Abstract class to wrap libraries to compute singular value
25 | * decompositions and related Matrix operations needed by Frequent
26 | * Directions. May return as many singular values as exist, but other
27 | * operations will limit output to k dimensions.
28 | */
29 | abstract class MatrixOps {
30 |
31 | // iterations for SISVD
32 | private static final int DEFAULT_NUM_ITER = 8;
33 |
34 | /**
35 | * Matrix dimensions
36 | */
37 | final int n_; // rows
38 | final int d_; // columns
39 |
40 | /**
41 | * Target number of dimensions
42 | */
43 | final int k_;
44 |
45 | /**
46 | * Singular value decomposition method to use
47 | */
48 | final SVDAlgo algo_;
49 |
50 | int numSISVDIter_;
51 |
52 | /**
53 | * Creates an empty MatrixOps object to support Frequent Directions matrix operations
54 | *
55 | * @param A Matrix of the required type and correct dimensions
56 | * @param algo Enum indicating method to use for SVD
57 | * @param k Target number of dimensions for results
58 | * @return an empty MatrixOps object
59 | */
60 | public static MatrixOps newInstance(final Matrix A, final SVDAlgo algo, final int k) {
61 | final int n = (int) A.getNumRows();
62 | final int d = (int) A.getNumColumns();
63 |
64 | final MatrixOps mo;
65 |
66 | switch (A.getMatrixType()) {
67 | case OJALGO:
68 | mo = new MatrixOpsImplOjAlgo(n, d, algo, k);
69 | break;
70 |
71 | default:
72 | throw new IllegalArgumentException("Unknown MatrixType: " + A.getMatrixType().toString());
73 | }
74 |
75 | if (algo == SVDAlgo.SISVD) {
76 | mo.setNumSISVDIter((int) Math.ceil(Math.log(d)));
77 | }
78 | return mo;
79 | }
80 |
81 | MatrixOps(final int n, final int d, final SVDAlgo algo, final int k) {
82 | // TODO: make these actual checks
83 | assert n > 0;
84 | assert d > 0;
85 | assert n < d;
86 | assert k > 0;
87 | assert k < n;
88 |
89 | n_ = n;
90 | d_ = d;
91 | algo_ = algo;
92 | k_ = k;
93 |
94 | numSISVDIter_ = DEFAULT_NUM_ITER;
95 | }
96 |
97 | /**
98 | * Computes and returns the singular values, in descending order. May modify the internal state
99 | * of this object.
100 | * @param A Matrix to decompose
101 | * @return Array of singular values
102 | */
103 | public double[] getSingularValues(final Matrix A) {
104 | svd(A, false);
105 | return getSingularValues();
106 | }
107 |
108 | /**
109 | * Returns pre-computed singular values (stored in descending order). Does not perform new computation.
110 | * @return Singular values from the last computation
111 | */
112 | abstract double[] getSingularValues();
113 |
114 | /**
115 | * Computes and returns the right singular vectors of A. May modify the internal state of this object.
116 | * @param A Matrix to decompose
117 | * @return Matrix of size d x k
118 | */
119 | public Matrix getVt(final Matrix A) {
120 | svd(A, true);
121 | return getVt();
122 | }
123 |
124 | /**
125 | * Returns pre-computed right singular vectors (row-wise?). Does not perform new computation.
126 | *
127 | * @return Matrix of size d x k
128 | */
129 | abstract Matrix getVt();
130 |
131 | /**
132 | * Performs a Frequent Directions rank reduction with the SVDAlgo used when obtaining the instance.
133 | * Modifies internal state, with results queried via getVt() and getSingularValues().
134 | * @return The amount of weight subtracted from the singular values
135 | */
136 | abstract double reduceRank(final Matrix A);
137 |
138 | /**
139 | * Returns Matrix object reconstructed using the provided singular value adjustment. Requires first
140 | * decomposing the matrix.
141 | * @param A Matrix to decompose and adjust
142 | * @param adjustment Amount by which to adjust the singular values
143 | * @return A new Matrix based on A with singular values adjusted by adjustment
144 | */
145 | abstract Matrix applyAdjustment(final Matrix A, final double adjustment);
146 |
147 | /**
148 | * Computes a singular value decomposition of the provided Matrix.
149 | *
150 | * @param A Matrix to decompose. Size must conform, and it may be overwritten on return. Pass a copy to
151 | * avoid this.
152 | * @param computeVectors True to compute Vt, false if only need singular values/
153 | */
154 | abstract void svd(final Matrix A, final boolean computeVectors);
155 |
156 | void setNumSISVDIter(final int numSISVDIter) {
157 | numSISVDIter_ = numSISVDIter;
158 | }
159 | }
160 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/MatrixFamily.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector;
21 |
22 | import java.util.HashMap;
23 | import java.util.Map;
24 |
25 | /**
26 | * Defines the various families of sketch and set operation classes. A family defines a set of
27 | * classes that share fundamental algorithms and behaviors. The classes within a family may
28 | * still differ by how they are stored and accessed. For example, internally there may be separate
29 | * classes for algorithms that operate on the Java heap and off-heap.
30 | * Not all of these families have parallel forms on and off-heap but are included for completeness.
31 | *
32 | *
Family IDs start at 128 to allow separation from sketches-core for as long as possible without
33 | * inducing a mutual dependency between packages.
34 | *
35 | * @author Lee Rhodes
36 | * @author Jon Malkin
37 | */
38 | public enum MatrixFamily {
39 | /**
40 | * The Frequent Directions sketch is used for approximate Singular Value Decomposition (MatrixOps) of a
41 | * matrix.
42 | */
43 | MATRIX(128, "Matrix", 2, 3),
44 | /**
45 | * Select Frequent Directions Family
46 | */
47 | FREQUENTDIRECTIONS(129, "FrequentDirections", 2, 4);
48 |
49 |
50 | private static final Map lookupID = new HashMap<>();
51 | private static final Map lookupFamName = new HashMap<>();
52 | private int id_;
53 | private String famName_;
54 | private int minPreLongs_;
55 | private int maxPreLongs_;
56 |
57 | static {
58 | for (MatrixFamily f : values()) {
59 | lookupID.put(f.getID(), f);
60 | lookupFamName.put(f.getFamilyName().toUpperCase(), f);
61 | }
62 | }
63 |
64 | MatrixFamily(final int id, final String famName, final int minPreLongs, final int maxPreLongs) {
65 | id_ = id;
66 | famName_ = famName.toUpperCase();
67 | minPreLongs_ = minPreLongs;
68 | maxPreLongs_ = maxPreLongs;
69 | }
70 |
71 | /**
72 | * Returns the byte ID for this family
73 | * @return the byte ID for this family
74 | */
75 | public int getID() {
76 | return id_;
77 | }
78 |
79 | /**
80 | *
81 | * @param id the given id, a value ≥ 128.
82 | */
83 | public void checkFamilyID(final int id) {
84 | if (id != id_) {
85 | throw new IllegalArgumentException(
86 | "Possible Corruption: This Family " + toString()
87 | + " does not match the ID of the given Family: " + idToFamily(id).toString());
88 | }
89 | }
90 |
91 | /**
92 | * Returns the name for this family
93 | * @return the name for this family
94 | */
95 | public String getFamilyName() {
96 | return famName_;
97 | }
98 |
99 | /**
100 | * Returns the minimum preamble size for this family in longs
101 | * @return the minimum preamble size for this family in longs
102 | */
103 | public int getMinPreLongs() {
104 | return minPreLongs_;
105 | }
106 |
107 | /**
108 | * Returns the maximum preamble size for this family in longs
109 | * @return the maximum preamble size for this family in longs
110 | */
111 | public int getMaxPreLongs() {
112 | return maxPreLongs_;
113 | }
114 |
115 | @Override
116 | public String toString() {
117 | return famName_;
118 | }
119 |
120 | /**
121 | * Returns the Family given the ID
122 | * @param id the given ID
123 | * @return the Family given the ID
124 | */
125 | public static MatrixFamily idToFamily(final int id) {
126 | final MatrixFamily f = lookupID.get(id);
127 | if (f == null) {
128 | throw new IllegalArgumentException("Possible Corruption: Illegal Family ID: " + id);
129 | }
130 | return f;
131 | }
132 |
133 | /**
134 | * Returns the Family given the family name
135 | * @param famName the family name
136 | * @return the Family given the family name
137 | */
138 | public static MatrixFamily stringToFamily(final String famName) {
139 | final MatrixFamily f = lookupFamName.get(famName.toUpperCase());
140 | if (f == null) {
141 | throw new IllegalArgumentException("Possible Corruption: Illegal Family Name: " + famName);
142 | }
143 | return f;
144 | }
145 |
146 | /**
147 | * Returns the Family given one of the recognized class objects on one of the Families
148 | * @param obj a recognized Family class object
149 | * @return the Family given one of the recognized class objects on one of the Families
150 | */
151 | public static MatrixFamily objectToFamily(final Object obj) {
152 | final String sname = obj.getClass().getSimpleName().toUpperCase();
153 | for (MatrixFamily f : values()) {
154 | if (sname.contains(f.toString())) {
155 | return f;
156 | }
157 | }
158 | throw new IllegalArgumentException("Possible Corruption: Unknown object");
159 | }
160 | }
161 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/vector/matrix/MatrixTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | import static org.testng.Assert.assertEquals;
23 | import static org.testng.Assert.assertNotNull;
24 | import static org.testng.Assert.assertNull;
25 | import static org.testng.Assert.assertTrue;
26 | import static org.testng.Assert.fail;
27 |
28 | import org.testng.annotations.Test;
29 |
30 | import org.apache.datasketches.memory.Memory;
31 | import org.apache.datasketches.memory.WritableMemory;
32 | import org.apache.datasketches.vector.MatrixFamily;
33 |
34 | @SuppressWarnings("javadoc")
35 | public class MatrixTest {
36 |
37 | @Test
38 | public void checkHeapify() {
39 | final Matrix m = Matrix.builder().setType(MatrixType.OJALGO).build(3, 3);
40 | final byte[] bytes = m.toByteArray();
41 | final Memory mem = Memory.wrap(bytes);
42 | println(MatrixPreambleUtil.preambleToString(mem));
43 |
44 | final Matrix tgt = Matrix.heapify(mem, MatrixType.OJALGO);
45 | assertTrue(tgt instanceof MatrixImplOjAlgo);
46 | checkMatrixEquality(m, tgt);
47 | }
48 |
49 | @Test
50 | public void checkWrap() {
51 | assertNull(Matrix.wrap(null));
52 |
53 | final Matrix src = Matrix.builder().setType(MatrixType.OJALGO).build(3, 3);
54 | final Object obj = src.getRawObject();
55 | final Matrix tgt = Matrix.wrap(obj);
56 | assertTrue(tgt instanceof MatrixImplOjAlgo);
57 | checkMatrixEquality(src, tgt);
58 |
59 | try {
60 | final Object notAMatrix = 1.0;
61 | Matrix.wrap(notAMatrix);
62 | fail();
63 | } catch (final IllegalArgumentException e) {
64 | // expected
65 | }
66 |
67 | assertNotNull(src.toString());
68 | }
69 |
70 | @Test
71 | public void checkSize() {
72 | final int nRow = 7;
73 | final int nCol = 3;
74 | final Matrix m = Matrix.builder().build(nRow, nCol);
75 |
76 | int expectedSize = (MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES)
77 | + (nRow * nCol * Double.BYTES);
78 | assertEquals(m.getSizeBytes(), expectedSize);
79 |
80 | // this should redirect call to getSizeBytes()
81 | assertEquals(m.getCompactSizeBytes(nRow, nCol), expectedSize);
82 |
83 | // degenerate cases
84 | expectedSize = (MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES);
85 | assertEquals(m.getCompactSizeBytes(0, nCol), expectedSize);
86 | assertEquals(m.getCompactSizeBytes(nRow, 0), expectedSize);
87 |
88 | // matrix subsets
89 | expectedSize = (MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES)
90 | + ((5 * 3) * Double.BYTES);
91 | assertEquals(m.getCompactSizeBytes(5, 3), expectedSize);
92 |
93 | expectedSize = (MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES)
94 | + ((7 * 2) * Double.BYTES);
95 | assertEquals(m.getCompactSizeBytes(7, 2), expectedSize);
96 |
97 | expectedSize = (MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES)
98 | + ((2 * 2) * Double.BYTES);
99 | assertEquals(m.getCompactSizeBytes(2, 2), expectedSize);
100 | }
101 |
102 | @Test
103 | public void invalidSerVer() {
104 | final int nRows = 50;
105 | final int nCols = 75;
106 | final MatrixBuilder mb = new MatrixBuilder();
107 | final Matrix m = mb.build(nRows, nCols);
108 | final byte[] sketchBytes = m.toByteArray();
109 | final WritableMemory mem = WritableMemory.wrap(sketchBytes);
110 | MatrixPreambleUtil.insertSerVer(mem.getArray(), mem.getCumulativeOffset(0L), 0);
111 |
112 | try {
113 | MatrixPreambleUtil.preambleToString(mem);
114 | fail();
115 | } catch (final IllegalArgumentException e) {
116 | // expected
117 | }
118 | }
119 |
120 | @Test
121 | public void invalidFamily() {
122 | final int nRows = 3;
123 | final int nCols = 3;
124 | final MatrixBuilder mb = new MatrixBuilder();
125 | final Matrix m = mb.build(nRows, nCols);
126 | final byte[] sketchBytes = m.toByteArray();
127 | final WritableMemory mem = WritableMemory.wrap(sketchBytes);
128 | MatrixPreambleUtil.insertFamilyID(mem.getArray(), mem.getCumulativeOffset(0L), 0);
129 |
130 | try {
131 | MatrixPreambleUtil.preambleToString(mem);
132 | fail();
133 | } catch (final IllegalArgumentException e) {
134 | // expected
135 | }
136 | }
137 |
138 | @Test
139 | public void checkInsufficientMemoryCapacity() {
140 | final byte[] bytes = new byte[6];
141 | final Memory mem = Memory.wrap(bytes);
142 | try {
143 | MatrixPreambleUtil.preambleToString(mem);
144 | fail();
145 | } catch (final IllegalArgumentException e) {
146 | // expected
147 | }
148 | }
149 |
150 | @Test
151 | public void checkCompactPreamble() {
152 | final int nRows = 4;
153 | final int nCols = 7;
154 | final MatrixBuilder mb = new MatrixBuilder();
155 | final Matrix m = mb.build(nRows, nCols);
156 |
157 | final Memory mem = Memory.wrap(m.toCompactByteArray(nRows - 1, 7));
158 | MatrixPreambleUtil.preambleToString(mem);
159 | }
160 |
161 | static void checkMatrixEquality(final Matrix m1, final Matrix m2) {
162 | assertEquals(m1.numRows_, m2.numRows_);
163 | assertEquals(m1.numCols_, m2.numCols_);
164 |
165 | for (int i = 0; i < m1.numRows_; ++i) {
166 | for (int j = 0; j < m1.numCols_; ++j) {
167 | assertEquals(m1.getElement(i, j), m2.getElement(i, j),
168 | "Mismatch at (" + i + ", " + j + ")");
169 | }
170 | }
171 | }
172 |
173 | /**
174 | * println the message
175 | * @param msg the message
176 | */
177 | static void println(final String msg) {
178 | //System.out.println(msg);
179 | }
180 | }
181 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/vector/matrix/MatrixImplOjAlgoTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | import static org.testng.Assert.assertEquals;
23 | import static org.testng.Assert.fail;
24 |
25 | import org.ojalgo.matrix.store.Primitive64Store;
26 | import org.testng.annotations.Test;
27 |
28 | import org.apache.datasketches.memory.Memory;
29 | import org.apache.datasketches.memory.WritableMemory;
30 |
31 | @SuppressWarnings("javadoc")
32 | public class MatrixImplOjAlgoTest {
33 |
34 | @Test
35 | public void checkInstantiation() {
36 | final int nRows = 10;
37 | final int nCols = 15;
38 | final Matrix m = MatrixImplOjAlgo.newInstance(nRows, nCols);
39 | assertEquals(m.getNumRows(), nRows);
40 | assertEquals(m.getNumColumns(), nCols);
41 |
42 | final Primitive64Store pds = (Primitive64Store) m.getRawObject();
43 | assertEquals(pds.countRows(), nRows);
44 | assertEquals(pds.countColumns(), nCols);
45 |
46 | final Matrix wrapped = Matrix.wrap(pds);
47 | MatrixTest.checkMatrixEquality(wrapped, m);
48 | assertEquals(wrapped.getRawObject(), pds);
49 | }
50 |
51 | @Test
52 | public void updateAndQueryValues() {
53 | final int nRows = 5;
54 | final int nCols = 5;
55 | final Matrix m = generateIncreasingEye(nRows, nCols); // tests setElement() in method
56 |
57 | for (int i = 0; i < nRows; ++i) {
58 | for (int j = 0; j < nCols; ++j) {
59 | final double val = m.getElement(i, j);
60 | if (i == j) {
61 | assertEquals(val, i + 1.0);
62 | } else {
63 | assertEquals(val, 0.0);
64 | }
65 | }
66 | }
67 | }
68 |
69 | @Test
70 | public void checkStandardSerialization() {
71 | final int nRows = 3;
72 | final int nCols = 7;
73 | final Matrix m = generateIncreasingEye(nRows, nCols);
74 |
75 | final byte[] mtxBytes = m.toByteArray();
76 | assertEquals(mtxBytes.length, m.getSizeBytes());
77 |
78 | final Memory mem = Memory.wrap(mtxBytes);
79 | final Matrix tgt = MatrixImplOjAlgo.heapifyInstance(mem);
80 | MatrixTest.checkMatrixEquality(tgt, m);
81 | }
82 |
83 | @Test
84 | public void checkCompactSerialization() {
85 | final int nRows = 4;
86 | final int nCols = 7;
87 | final Matrix m = generateIncreasingEye(nRows, nCols);
88 |
89 | byte[] mtxBytes = m.toCompactByteArray(nRows - 1, 7);
90 | assertEquals(mtxBytes.length, m.getCompactSizeBytes(nRows - 1, 7));
91 |
92 | Memory mem = Memory.wrap(mtxBytes);
93 | Matrix tgt = MatrixImplOjAlgo.heapifyInstance(mem);
94 | for (int c = 0; c < nCols; ++c) {
95 | for (int r = 0; r < (nRows - 1); ++r) {
96 | assertEquals(tgt.getElement(r, c), m.getElement(r, c)); // equal here
97 | }
98 | // assuming nRows - 1 so check only the last row as being 0
99 | assertEquals(tgt.getElement(nRows - 1, c), 0.0);
100 | }
101 |
102 | // test without compacting
103 | mtxBytes = m.toCompactByteArray(nRows, nCols);
104 | assertEquals(mtxBytes.length, m.getSizeBytes());
105 | mem = Memory.wrap(mtxBytes);
106 | tgt = MatrixImplOjAlgo.heapifyInstance(mem);
107 | MatrixTest.checkMatrixEquality(tgt, m);
108 | }
109 |
110 | @Test
111 | public void matrixRowOperations() {
112 | final int nRows = 7;
113 | final int nCols = 5;
114 | final Matrix m = generateIncreasingEye(nRows, nCols);
115 |
116 | final int tgtCol = 2;
117 | final double[] v = m.getRow(tgtCol); // diagonal matrix, so this works ok
118 | for (int i = 0; i < v.length; ++i) {
119 | assertEquals(v[i], (i == tgtCol ? i + 1.0 : 0.0));
120 | }
121 |
122 | assertEquals(m.getElement(6, tgtCol), 0.0);
123 | m.setRow(6, v);
124 | assertEquals(m.getElement(6, tgtCol), tgtCol + 1.0);
125 | }
126 |
127 | @Test
128 | public void matrixColumnOperations() {
129 | final int nRows = 9;
130 | final int nCols = 4;
131 | final Matrix m = generateIncreasingEye(nRows, nCols);
132 |
133 | final int tgtRow = 3;
134 | final double[] v = m.getColumn(tgtRow); // diagonal matrix, so this works ok
135 | for (int i = 0; i < v.length; ++i) {
136 | assertEquals(v[i], (i == tgtRow ? i + 1.0 : 0.0));
137 | }
138 |
139 | assertEquals(m.getElement(tgtRow, 0), 0.0);
140 | m.setColumn(0, v);
141 | assertEquals(m.getElement(tgtRow, 0), tgtRow + 1.0);
142 | }
143 |
144 | @Test
145 | public void invalidRowColumnOperations() {
146 | final int nRows = 9;
147 | final int nCols = 4;
148 | final Matrix m = generateIncreasingEye(nRows, nCols);
149 |
150 | final double[] shortRow = new double[nCols - 2];
151 | try {
152 | m.setRow(1, shortRow);
153 | fail();
154 | } catch (final IllegalArgumentException e) {
155 | // expected
156 | }
157 |
158 | final double[] longColumn = new double[nRows + 2];
159 | try {
160 | m.setColumn(1, longColumn);
161 | fail();
162 | } catch (final IllegalArgumentException e) {
163 | // expected
164 | }
165 | }
166 |
167 | @Test
168 | public void invalidSerVer() {
169 | final int nRows = 3;
170 | final int nCols = 3;
171 | final Matrix m = generateIncreasingEye(nRows, nCols);
172 | final byte[] sketchBytes = m.toByteArray();
173 | final WritableMemory mem = WritableMemory.wrap(sketchBytes);
174 | MatrixPreambleUtil.insertSerVer(mem.getArray(), mem.getCumulativeOffset(0L), 0);
175 |
176 | try {
177 | MatrixImplOjAlgo.heapifyInstance(mem);
178 | fail();
179 | } catch (final IllegalArgumentException e) {
180 | // expected
181 | }
182 | }
183 |
184 | @Test
185 | public void invalidFamily() {
186 | final int nRows = 3;
187 | final int nCols = 3;
188 | final Matrix m = generateIncreasingEye(nRows, nCols);
189 | final byte[] sketchBytes = m.toByteArray();
190 | final WritableMemory mem = WritableMemory.wrap(sketchBytes);
191 | MatrixPreambleUtil.insertFamilyID(mem.getArray(), mem.getCumulativeOffset(0L), 0);
192 |
193 | try {
194 | MatrixImplOjAlgo.heapifyInstance(mem);
195 | fail();
196 | } catch (final IllegalArgumentException e) {
197 | // expected
198 | }
199 | }
200 |
201 | @Test
202 | public void insufficientMemoryCapacity() {
203 | final byte[] bytes = new byte[6];
204 | final Memory mem = Memory.wrap(bytes);
205 | try {
206 | MatrixImplOjAlgo.heapifyInstance(mem);
207 | fail();
208 | } catch (final IllegalArgumentException e) {
209 | // expected
210 | }
211 | }
212 |
213 | /**
214 | * Creates a scaled I matrix, where the diagonal consists of increasing integers,
215 | * starting with 1.0.
216 | * @param nRows number of rows
217 | * @param nCols number of columns
218 | * @return Primitive64Store, suitable for direct use or wrapping
219 | */
220 | private static Matrix generateIncreasingEye(final int nRows, final int nCols) {
221 | final Matrix m = MatrixImplOjAlgo.newInstance(nRows, nCols);
222 | for (int i = 0; (i < nRows) && (i < nCols); ++i) {
223 | m.setElement(i, i, 1.0 + i);
224 | }
225 | return m;
226 | }
227 | }
228 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/decomposition/MatrixOpsImplOjAlgo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.decomposition;
21 |
22 | import java.util.Optional;
23 |
24 | import org.ojalgo.matrix.decomposition.Eigenvalue;
25 | import org.ojalgo.matrix.decomposition.QR;
26 | import org.ojalgo.matrix.decomposition.SingularValue;
27 | import org.ojalgo.matrix.store.MatrixStore;
28 | import org.ojalgo.matrix.store.Primitive64Store;
29 | import org.ojalgo.matrix.store.SparseStore;
30 | import org.ojalgo.random.Normal;
31 |
32 | import org.apache.datasketches.vector.matrix.Matrix;
33 | import org.apache.datasketches.vector.matrix.MatrixType;
34 |
35 | class MatrixOpsImplOjAlgo extends MatrixOps {
36 | private double[] sv_;
37 | private Primitive64Store Vt_;
38 |
39 | // work objects for SISVD
40 | private Primitive64Store block_;
41 | private Primitive64Store T_; // also used in SymmetricEVD
42 | private QR qr_;
43 |
44 | // work objects for Symmetric EVD
45 | private Eigenvalue evd_;
46 |
47 | // work object for full SVD
48 | private SingularValue svd_;
49 |
50 | transient private SparseStore S_; // to hold singular value matrix
51 |
52 | MatrixOpsImplOjAlgo(final int n, final int d, final SVDAlgo algo, final int k) {
53 | super(n, d, algo, k);
54 |
55 | // Allocate space for the decomposition
56 | sv_ = new double[Math.min(n_, d_)];
57 | Vt_ = null; // lazy allocation
58 | }
59 |
60 | @Override
61 | void svd(final Matrix A, final boolean computeVectors) {
62 | assert A.getMatrixType() == MatrixType.OJALGO;
63 |
64 | if (A.getNumRows() != n_) {
65 | throw new IllegalArgumentException("A.numRows() != n_");
66 | } else if (A.getNumColumns() != d_) {
67 | throw new IllegalArgumentException("A.numColumns() != d_");
68 | }
69 |
70 | if (computeVectors && (Vt_ == null)) {
71 | Vt_ = Primitive64Store.FACTORY.make(n_, d_);
72 | S_ = SparseStore.makePrimitive(sv_.length, sv_.length);
73 | }
74 |
75 | switch (algo_) {
76 | case FULL:
77 | computeFullSVD((Primitive64Store) A.getRawObject(), computeVectors);
78 | return;
79 |
80 | case SISVD:
81 | computeSISVD((Primitive64Store) A.getRawObject(), computeVectors);
82 | return;
83 |
84 | case SYM:
85 | computeSymmEigSVD((Primitive64Store) A.getRawObject(), computeVectors);
86 | return;
87 |
88 | default:
89 | throw new RuntimeException("SVDAlgo type not (yet?) supported: " + algo_.toString());
90 | }
91 | }
92 |
93 | @Override
94 | double[] getSingularValues() {
95 | return sv_;
96 | }
97 |
98 | @Override
99 | Matrix getVt() {
100 | return Matrix.wrap(Vt_);
101 | }
102 |
103 | @Override
104 | double reduceRank(final Matrix A) {
105 | svd(A, true);
106 |
107 | double svAdjustment = 0.0;
108 |
109 | if (sv_.length >= k_) {
110 | double medianSVSq = sv_[k_ - 1]; // (l_/2)th item, not yet squared
111 | medianSVSq *= medianSVSq;
112 | svAdjustment += medianSVSq; // always track, even if not using compensative mode
113 | for (int i = 0; i < (k_ - 1); ++i) {
114 | final double val = sv_[i];
115 | final double adjSqSV = (val * val) - medianSVSq;
116 | S_.set(i, i, adjSqSV < 0 ? 0.0 : Math.sqrt(adjSqSV)); // just to be safe
117 | }
118 | for (int i = k_ - 1; i < S_.countColumns(); ++i) {
119 | S_.set(i, i, 0.0);
120 | }
121 | } else {
122 | throw new RuntimeException("Running with d < 2k not (yet?) supported");
123 | /*
124 | for (int i = 0; i < sv_.length; ++i) {
125 | S_.set(i, i, sv_[i]);
126 | }
127 | for (int i = sv_.length; i < S_.countColumns(); ++i) {
128 | S_.set(i, i, 0.0);
129 | }
130 | */
131 | }
132 |
133 | // store the result back in A
134 | S_.multiply(Vt_, (Primitive64Store) A.getRawObject());
135 |
136 | return svAdjustment;
137 | }
138 |
139 | @Override
140 | Matrix applyAdjustment(final Matrix A, final double svAdjustment) {
141 | // copy A before decomposing
142 | final Primitive64Store result
143 | = Primitive64Store.FACTORY.copy((Primitive64Store) A.getRawObject());
144 | svd(Matrix.wrap(result), true);
145 |
146 | for (int i = 0; i < (k_ - 1); ++i) {
147 | final double val = sv_[i];
148 | final double adjSV = Math.sqrt((val * val) + svAdjustment);
149 | S_.set(i, i, adjSV);
150 | }
151 | for (int i = k_ - 1; i < S_.countColumns(); ++i) {
152 | S_.set(i, i, 0.0);
153 | }
154 |
155 | S_.multiply(Vt_, result);
156 |
157 | return Matrix.wrap(result);
158 | }
159 |
160 | private void computeFullSVD(final MatrixStore A, final boolean computeVectors) {
161 | if (svd_ == null) {
162 | svd_ = SingularValue.PRIMITIVE.make(A);
163 | }
164 |
165 | if (computeVectors) {
166 | svd_.decompose(A);
167 | svd_.getV().transpose().supplyTo(Vt_);
168 | } else {
169 | svd_.computeValuesOnly(A);
170 | }
171 | svd_.getSingularValues(sv_);
172 | }
173 |
174 | private void computeSISVD(final MatrixStore A, final boolean computeVectors) {
175 | // want to iterate on smaller dimension of A (n x d)
176 | // currently, error in constructor if d < n, so n is always the smaller dimension
177 | if (block_ == null) {
178 | block_ = Primitive64Store.FACTORY.makeFilled(d_, k_, new Normal(0.0, 1.0));
179 | qr_ = QR.PRIMITIVE.make(block_);
180 | T_ = Primitive64Store.FACTORY.make(n_, k_);
181 | } else {
182 | block_.fillAll(new Normal(0.0, 1.0));
183 | }
184 |
185 | // orthogonalize for numeric stability
186 | qr_.decompose(block_);
187 | qr_.getQ().supplyTo(block_);
188 |
189 | for (int i = 0; i < numSISVDIter_; ++i) {
190 | A.multiply(block_, T_);
191 |
192 | // again, just for stability
193 | qr_.decompose(T_.premultiply(A.transpose()));
194 | qr_.getQ().supplyTo(block_);
195 | }
196 |
197 | // Rayleigh-Ritz postprocessing
198 |
199 | final SingularValue svd = SingularValue.PRIMITIVE.make(T_);
200 | svd.compute(block_.premultiply(A));
201 |
202 | svd.getSingularValues(sv_);
203 |
204 | if (computeVectors) {
205 | // V = block * Q2^T so V^T = Q2 * block^T
206 | // and ojAlgo figures out that it only needs to fill the first k_ rows of Vt_
207 | svd.getV().multiply(block_.transpose()).supplyTo(Vt_);
208 | }
209 | }
210 |
211 | private void computeSymmEigSVD(final MatrixStore A, final boolean computeVectors) {
212 | if (evd_ == null) {
213 | evd_ = Eigenvalue.PRIMITIVE.make(n_, true);
214 | }
215 |
216 | // want left singular vectors U, aka eigenvectors of AA^T -- so compute that
217 | evd_.decompose(A.transpose().premultiply(A));
218 |
219 | // TODO: can we only use k_ values?
220 | final double[] ev = new double[n_];
221 | evd_.getEigenvalues(ev, Optional.empty());
222 | for (int i = 0; i < ev.length; ++i) {
223 | final double val = Math.sqrt(ev[i]);
224 | sv_[i] = val;
225 | if (computeVectors && (val > 0)) { S_.set(i, i, 1 / val); }
226 | }
227 |
228 | if (computeVectors) {
229 | S_.multiply(evd_.getV().transpose()).multiply(A, Vt_);
230 | }
231 | }
232 | }
233 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/matrix/Matrix.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.LS;
23 |
24 | import org.ojalgo.matrix.store.Primitive64Store;
25 |
26 | import org.apache.datasketches.memory.Memory;
27 | import org.apache.datasketches.vector.MatrixFamily;
28 |
29 | /**
30 | * Provides an implementation-agnostic wrapper around Matrix classes.
31 | *
32 | * @author Jon Malkin
33 | */
34 | public abstract class Matrix {
35 | int numRows_;
36 | int numCols_;
37 |
38 | /**
39 | * Loads matrix from srcMem, assuming storage in column-major order to ensure portability.
40 | * Does not necessarily encode matrix size; do not expect size checks based on passed-in
41 | * parameters.
42 | *
43 | * @param srcMem Memory wrapping the matrix
44 | * @param type Matrix implementation type to use
45 | * @return The heapified matrix
46 | */
47 | public static Matrix heapify(final Memory srcMem, final MatrixType type) {
48 | switch (type) {
49 | case OJALGO:
50 | return MatrixImplOjAlgo.heapifyInstance(srcMem);
51 | default:
52 | return null;
53 | }
54 | }
55 |
56 | /**
57 | * Wraps an object without allocating memory. This method will throw an exception if the mtx
58 | * Object is not of the same type as the implementing class's native format.
59 | * @param mtx Matrix object to wrap
60 | * @return A Matrix object
61 | */
62 | public static Matrix wrap(final Object mtx) {
63 | if (mtx == null) {
64 | return null;
65 | } else if (mtx instanceof Primitive64Store) {
66 | return MatrixImplOjAlgo.wrap((Primitive64Store) mtx);
67 | }
68 | else {
69 | throw new IllegalArgumentException("wrap() does not currently support "
70 | + mtx.getClass().toString());
71 | }
72 | }
73 |
74 | /**
75 | * Gets a builder to be able to create instances of Matrix objects
76 | * @return a MatrixBuilder object
77 | */
78 | public static MatrixBuilder builder() {
79 | return new MatrixBuilder();
80 | }
81 |
82 | /**
83 | * Returns the raw data object backing this Matrix, as an Object. Must be cast to the
84 | * appropriate type (assuming knowledge of the implementation) to be used.
85 | * @return An Object pointing to the raw data backing this Matrix
86 | */
87 | public abstract Object getRawObject();
88 |
89 | /**
90 | * Serializes the Matrix in a custom format as a byte array
91 | * @return A byte[] conttaining a serialized Matrix
92 | */
93 | public abstract byte[] toByteArray();
94 |
95 | /**
96 | * Serializes a sub-Matrix by storing only the first numRows and numCols rows and columns,
97 | * respsectively.
98 | * @param numRows Number of rows to write
99 | * @param numCols Number of columns to write
100 | * @return A byte[] containing the serialized sub-Matrix.
101 | */
102 | public abstract byte[] toCompactByteArray(int numRows, int numCols);
103 |
104 | /**
105 | * Returns a single element from the Matrix
106 | * @param row Row index of target element (0-based)
107 | * @param col Column index of target elemtn (0-based)
108 | * @return Matrix value at (row, column)
109 | */
110 | public abstract double getElement(int row, int col);
111 |
112 | /**
113 | * Returns a copy of an entire row of the Matrix
114 | * @param row Row index to return (0-based)
115 | * @return A double[] representing the Matrix row
116 | */
117 | public abstract double[] getRow(int row);
118 |
119 | /**
120 | * Returns a copy of an entire column of the Matrix
121 | * @param col Column index to return (0-based)
122 | * @return A double[] representing the Matrix column
123 | */
124 | public abstract double[] getColumn(int col);
125 |
126 | /**
127 | * Sets a single element inthe Matrix
128 | * @param row Row index of target element (0-based)
129 | * @param col Column index of target element (0-based)
130 | * @param value The value to insert into the Matrix at (row, column)
131 | */
132 | public abstract void setElement(int row, int col, double value);
133 |
134 | /**
135 | * Sets an entire row of the Matrix, by copying data from the input
136 | * @param row Target row index (0-based)
137 | * @param values Array of values to write into the Matrix
138 | */
139 | public abstract void setRow(int row, double[] values);
140 |
141 | /**
142 | * Sets an entire column of the Matrix, by copying data from the input
143 | * @param column Target column index (0-based)
144 | * @param values Array of values to write into the Matrix
145 | */
146 | public abstract void setColumn(int column, double[] values);
147 |
148 | /**
149 | * Gets the number of rows in the Matrix
150 | * @return Configured number of rows in the Matrix
151 | */
152 | public long getNumRows() { return numRows_; }
153 |
154 | /**
155 | * Gets the number of columns in the Matrix
156 | * @return Configured number of columns in the Matrix
157 | */
158 | public long getNumColumns() { return numCols_; }
159 |
160 | /**
161 | * Gets serialized size of the Matrix, in bytes.
162 | * @return Number of bytes needed for a serialized Matrix
163 | */
164 | public int getSizeBytes() {
165 | final int preBytes = MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES;
166 | final int mtxBytes = (numRows_ * numCols_) * Double.BYTES;
167 | return preBytes + mtxBytes;
168 | }
169 |
170 | /**
171 | * Gets serialized size of the Matrix in compact form, in bytes.
172 | * @param rows Number of rows to select for writing
173 | * @param cols Number of columns to select for writing
174 | * @return Number of bytes needed to serialize the first (rows, cols) of this Matrix
175 | */
176 | public int getCompactSizeBytes(final int rows, final int cols) {
177 | final int nRows = Math.min(rows, numRows_);
178 | final int nCols = Math.min(cols, numCols_);
179 |
180 | if ((nRows < 1) || (nCols < 1)) {
181 | return MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES;
182 | } else if ((nRows == numRows_) && (nCols == numCols_)) {
183 | return getSizeBytes();
184 | }
185 |
186 | final int preBytes = MatrixFamily.MATRIX.getMaxPreLongs() * Long.BYTES;
187 | final int mtxBytes = (nRows * nCols) * Double.BYTES;
188 | return preBytes + mtxBytes;
189 | }
190 |
191 | /**
192 | * Writes information about this Matrix to a String.
193 | * @return A human-readable representation of a Matrix
194 | */
195 | @Override
196 | public String toString() {
197 | final StringBuilder sb = new StringBuilder();
198 |
199 | sb.append(" Matrix data :").append(LS);
200 | sb.append(this.getClass().getName());
201 | sb.append(" < ").append(numRows_).append(" x ").append(numCols_).append(" >");
202 |
203 | // First element
204 | sb.append("\n{ { ").append(getElement(0, 0));
205 |
206 | // Rest of the first row
207 | for (int j = 1; j < numCols_; j++) {
208 | sb.append(",\t").append(getElement(0, j));
209 | }
210 |
211 | // For each of the remaining rows
212 | for (int i = 1; i < numRows_; i++) {
213 |
214 | // First column
215 | sb.append(" },\n{ ").append(getElement(i, 0));
216 |
217 | // Remaining columns
218 | for (int j = 1; j < numCols_; j++) {
219 | sb.append(",\t").append(getElement(i, j));
220 | }
221 | }
222 |
223 | // Finish
224 | sb.append(" } }").append(LS);
225 |
226 | return sb.toString();
227 | }
228 |
229 | /**
230 | * Gets the matrix type
231 | * @return the matrix type
232 | */
233 | public abstract MatrixType getMatrixType();
234 | }
235 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/matrix/MatrixImplOjAlgo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.COMPACT_FLAG_MASK;
23 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractFamilyID;
24 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractFlags;
25 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumColumns;
26 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumColumnsUsed;
27 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumRows;
28 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractNumRowsUsed;
29 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractPreLongs;
30 | import static org.apache.datasketches.vector.matrix.MatrixPreambleUtil.extractSerVer;
31 |
32 | import org.ojalgo.matrix.store.Primitive64Store;
33 |
34 | import org.apache.datasketches.memory.Memory;
35 | import org.apache.datasketches.memory.WritableMemory;
36 | import org.apache.datasketches.vector.MatrixFamily;
37 |
38 | /**
39 | * Implements the ojAlgo Matrix operations.
40 | */
41 | public final class MatrixImplOjAlgo extends Matrix {
42 | private Primitive64Store mtx_;
43 |
44 | private MatrixImplOjAlgo(final int numRows, final int numCols) {
45 | mtx_ = Primitive64Store.FACTORY.make(numRows, numCols);
46 | numRows_ = numRows;
47 | numCols_ = numCols;
48 | }
49 |
50 | private MatrixImplOjAlgo(final Primitive64Store mtx) {
51 | mtx_ = mtx;
52 | numRows_ = (int) mtx.countRows();
53 | numCols_ = (int) mtx.countColumns();
54 | }
55 |
56 | static Matrix newInstance(final int numRows, final int numCols) {
57 | return new MatrixImplOjAlgo(numRows, numCols);
58 | }
59 |
60 | static Matrix heapifyInstance(final Memory srcMem) {
61 | final int minBytes = MatrixFamily.MATRIX.getMinPreLongs() * Long.BYTES;
62 | final long memCapBytes = srcMem.getCapacity();
63 | if (memCapBytes < minBytes) {
64 | throw new IllegalArgumentException("Source Memory too small: " + memCapBytes
65 | + " < " + minBytes);
66 | }
67 |
68 | final int preLongs = extractPreLongs(srcMem);
69 | final int serVer = extractSerVer(srcMem);
70 | final int familyID = extractFamilyID(srcMem);
71 |
72 | if (serVer != 1) {
73 | throw new IllegalArgumentException("Invalid SerVer reading srcMem. Expected 1, found: "
74 | + serVer);
75 | }
76 | if (familyID != MatrixFamily.MATRIX.getID()) {
77 | throw new IllegalArgumentException("srcMem does not point to a Matrix");
78 | }
79 |
80 | final int flags = extractFlags(srcMem);
81 | final boolean isCompact = (flags & COMPACT_FLAG_MASK) > 0;
82 |
83 | int nRows = extractNumRows(srcMem);
84 | int nCols = extractNumColumns(srcMem);
85 |
86 | final MatrixImplOjAlgo matrix = new MatrixImplOjAlgo(nRows, nCols);
87 | if (isCompact) {
88 | nRows = extractNumRowsUsed(srcMem);
89 | nCols = extractNumColumnsUsed(srcMem);
90 | }
91 |
92 | int memOffset = preLongs * Long.BYTES;
93 | for (int c = 0; c < nCols; ++c) {
94 | for (int r = 0; r < nRows; ++r) {
95 | matrix.mtx_.set(r, c, srcMem.getDouble(memOffset));
96 | memOffset += Double.BYTES;
97 | }
98 | }
99 |
100 | return matrix;
101 | }
102 |
103 | static Matrix wrap(final Primitive64Store mtx) {
104 | return new MatrixImplOjAlgo(mtx);
105 | }
106 |
107 | @Override
108 | public Object getRawObject() {
109 | return mtx_;
110 | }
111 |
112 | @Override
113 | public byte[] toByteArray() {
114 | final int preLongs = 2;
115 | final long numElements = mtx_.count();
116 | assert numElements == (mtx_.countColumns() * mtx_.countRows());
117 |
118 | final int outBytes = (int) (((long)preLongs * Long.BYTES) + (numElements * Double.BYTES));
119 | final byte[] outByteArr = new byte[outBytes];
120 | final WritableMemory memOut = WritableMemory.wrap(outByteArr);
121 | final Object memObj = memOut.getArray();
122 | final long memAddr = memOut.getCumulativeOffset(0L);
123 |
124 | MatrixPreambleUtil.insertPreLongs(memObj, memAddr, preLongs);
125 | MatrixPreambleUtil.insertSerVer(memObj, memAddr, MatrixPreambleUtil.SER_VER);
126 | MatrixPreambleUtil.insertFamilyID(memObj, memAddr, MatrixFamily.MATRIX.getID());
127 | MatrixPreambleUtil.insertFlags(memObj, memAddr, 0);
128 | MatrixPreambleUtil.insertNumRows(memObj, memAddr, (int) mtx_.countRows());
129 | MatrixPreambleUtil.insertNumColumns(memObj, memAddr, (int) mtx_.countColumns());
130 | memOut.putDoubleArray(preLongs << 3, mtx_.data, 0, (int) numElements);
131 |
132 | return outByteArr;
133 | }
134 |
135 | @Override
136 | public byte[] toCompactByteArray(final int numRows, final int numCols) {
137 | // TODO: row/col limit checks
138 |
139 | final int preLongs = 3;
140 |
141 | // for non-compact we can do an array copy, so save as non-compact if using the entire matrix
142 | final long numElements = (long) numRows * numCols;
143 | final boolean isCompact = numElements < mtx_.count();
144 | if (!isCompact) {
145 | return toByteArray();
146 | }
147 |
148 | assert numElements < mtx_.count();
149 |
150 | final int outBytes = (int) (((long)preLongs * Long.BYTES) + (numElements * Double.BYTES));
151 | final byte[] outByteArr = new byte[outBytes];
152 | final WritableMemory memOut = WritableMemory.wrap(outByteArr);
153 | final Object memObj = memOut.getArray();
154 | final long memAddr = memOut.getCumulativeOffset(0L);
155 |
156 | MatrixPreambleUtil.insertPreLongs(memObj, memAddr, preLongs);
157 | MatrixPreambleUtil.insertSerVer(memObj, memAddr, MatrixPreambleUtil.SER_VER);
158 | MatrixPreambleUtil.insertFamilyID(memObj, memAddr, MatrixFamily.MATRIX.getID());
159 | MatrixPreambleUtil.insertFlags(memObj, memAddr, COMPACT_FLAG_MASK);
160 | MatrixPreambleUtil.insertNumRows(memObj, memAddr, (int) mtx_.countRows());
161 | MatrixPreambleUtil.insertNumColumns(memObj, memAddr, (int) mtx_.countColumns());
162 | MatrixPreambleUtil.insertNumRowsUsed(memObj, memAddr, numRows);
163 | MatrixPreambleUtil.insertNumColumnsUsed(memObj, memAddr, numCols);
164 |
165 | // write elements in column-major order
166 | long offsetBytes = (long)preLongs * Long.BYTES;
167 | for (int c = 0; c < numCols; ++c) {
168 | for (int r = 0; r < numRows; ++r) {
169 | memOut.putDouble(offsetBytes, mtx_.doubleValue(r, c));
170 | offsetBytes += Double.BYTES;
171 | }
172 | }
173 |
174 | return outByteArr;
175 | }
176 |
177 | @Override
178 | public double getElement(final int row, final int col) {
179 | return mtx_.doubleValue(row, col);
180 | }
181 |
182 | @Override
183 | public double[] getRow(final int row) {
184 | final int cols = (int) mtx_.countColumns();
185 | final double[] result = new double[cols];
186 | for (int c = 0; c < cols; ++c) {
187 | result[c] = mtx_.doubleValue(row, c);
188 | }
189 | return result;
190 | }
191 |
192 | @Override
193 | public double[] getColumn(final int col) {
194 | final int rows = (int) mtx_.countRows();
195 | final double[] result = new double[rows];
196 | for (int r = 0; r < rows; ++r) {
197 | result[r] = mtx_.doubleValue(r, col);
198 | }
199 | return result;
200 | }
201 |
202 | @Override
203 | public void setElement(final int row, final int col, final double value) {
204 | mtx_.set(row, col, value);
205 | }
206 |
207 | @Override
208 | public void setRow(final int row, final double[] values) {
209 | if (values.length != mtx_.countColumns()) {
210 | throw new IllegalArgumentException("Invalid number of elements for row. Expected "
211 | + mtx_.countColumns() + ", found " + values.length);
212 | }
213 |
214 | for (int i = 0; i < mtx_.countColumns(); ++i) {
215 | mtx_.set(row, i, values[i]);
216 | }
217 | }
218 |
219 | @Override
220 | public void setColumn(final int column, final double[] values) {
221 | if (values.length != mtx_.countRows()) {
222 | throw new IllegalArgumentException("Invalid number of elements for column. Expected "
223 | + mtx_.countRows() + ", found " + values.length);
224 | }
225 |
226 | for (int i = 0; i < mtx_.countRows(); ++i) {
227 | mtx_.set(i, column, values[i]);
228 | }
229 | }
230 |
231 | @Override
232 | public MatrixType getMatrixType() {
233 | return MatrixType.OJALGO;
234 | }
235 |
236 | }
237 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/matrix/MatrixPreambleUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.matrix;
21 |
22 | import static org.apache.datasketches.memory.UnsafeUtil.unsafe;
23 |
24 | import org.apache.datasketches.memory.Memory;
25 | import org.apache.datasketches.vector.MatrixFamily;
26 |
27 | /**
28 | * This class defines the preamble items structure and provides basic utilities for some of the
29 | * key fields for a Matrix
30 | *
31 | *
32 | * The low significance bytes of this long items structure are on the right. Multi-byte
33 | * integers (int and long) are stored in native byte order. All byte
34 | * values are treated as unsigned.
35 | *
36 | *
An empty or non-compact Matrix requires 16 bytes. A compact under-full matrix requires
37 | * 24 bytes of preamble.
51 | *
52 | * @author Jon Malkin
53 | */
54 | @SuppressWarnings("restriction")
55 | public final class MatrixPreambleUtil {
56 |
57 | /**
58 | * The java line separator character as a String.
59 | */
60 | static final String LS = System.getProperty("line.separator");
61 |
62 | private MatrixPreambleUtil() {}
63 |
64 | // ###### DO NOT MESS WITH THIS FROM HERE ...
65 | // Preamble byte Addresses
66 | private static final int PREAMBLE_LONGS_BYTE = 0;
67 | private static final int SER_VER_BYTE = 1;
68 | private static final int FAMILY_BYTE = 2;
69 | private static final int FLAGS_BYTE = 3;
70 | private static final int NUM_ROWS_INT = 8;
71 | private static final int NUM_COLUMNS_INT = 12;
72 | private static final int ROWS_USED_INT = 16;
73 | private static final int COLS_USED_INT = 20;
74 |
75 | // flag bit masks
76 | //static final int EMPTY_FLAG_MASK = 4;
77 | static final int COMPACT_FLAG_MASK = 8;
78 |
79 | // Other constants
80 | static final int SER_VER = 1;
81 |
82 | /**
83 | * Returns a human readable string summary of the preamble state of the given Memory.
84 | * Note: other than making sure that the given Memory size is large
85 | * enough for just the preamble, this does not do much value checking of the contents of the
86 | * preamble as this is primarily a tool for debugging the preamble visually.
87 | *
88 | * @param mem the given Memory.
89 | * @return the summary preamble string.
90 | */
91 | public static String preambleToString(final Memory mem) {
92 |
93 | final int preLongs = getAndCheckPreLongs(mem); // make sure we can get the assumed preamble
94 | final MatrixFamily family = MatrixFamily.idToFamily(extractFamilyID(mem));
95 |
96 | final int serVer = extractSerVer(mem);
97 | if (serVer != SER_VER) {
98 | throw new IllegalArgumentException("Invalid serialization version in memory region. "
99 | + "Found: " + serVer);
100 | }
101 |
102 | final int flags = extractFlags(mem);
103 | final String flagsStr = Integer.toBinaryString(flags) + ", " + flags;
104 | //final boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0;
105 | final boolean isCompact = (flags & COMPACT_FLAG_MASK) > 0;
106 |
107 | final int numRows = extractNumRows(mem);
108 | final int numCols = extractNumColumns(mem);
109 |
110 | int numRowsUsed = numRows;
111 | int numColsUsed = numCols;
112 | if (isCompact) {
113 | numRowsUsed = extractNumRowsUsed(mem);
114 | numColsUsed = extractNumColumnsUsed(mem);
115 | }
116 |
117 | final StringBuilder sb = new StringBuilder();
118 | sb.append(LS)
119 | .append("### START ")
120 | .append(family.getFamilyName().toUpperCase())
121 | .append(" PREAMBLE SUMMARY").append(LS)
122 | .append("Byte 0: Preamble Longs : ").append(preLongs).append(LS)
123 | .append("Byte 1: Serialization Version: ").append(serVer).append(LS)
124 | .append("Byte 2: Family : ").append(family.toString()).append(LS)
125 | .append("Byte 3: Flags Field : ").append(flagsStr).append(LS)
126 | //.append(" EMPTY : ").append(isEmpty).append(LS)
127 | .append(" COMPACT : ").append(isCompact).append(LS)
128 | .append("Bytes 8-11: Num Rows : ").append(numRows).append(LS)
129 | .append("Bytes 12-15: Num Columns : ").append(numCols).append(LS);
130 |
131 | if (isCompact) {
132 | sb.append("Bytes 16-23: Num Rows Used : ").append(numRowsUsed).append(LS);
133 | sb.append("Bytes 24-31: Num Columns Used : ").append(numColsUsed).append(LS);
134 | }
135 |
136 | return sb.toString();
137 | }
138 |
139 | // Extraction methods
140 |
141 | static int extractPreLongs(final Memory mem) {
142 | return mem.getInt(PREAMBLE_LONGS_BYTE) & 0xFF;
143 | }
144 |
145 | static int extractSerVer(final Memory mem) {
146 | return mem.getInt(SER_VER_BYTE) & 0xFF;
147 | }
148 |
149 | static int extractFamilyID(final Memory mem) {
150 | return mem.getByte(FAMILY_BYTE) & 0xFF;
151 | }
152 |
153 | static int extractFlags(final Memory mem) {
154 | return mem.getByte(FLAGS_BYTE) & 0xFF;
155 | }
156 |
157 | static int extractNumRows(final Memory mem) {
158 | return mem.getInt(NUM_ROWS_INT);
159 | }
160 |
161 | static int extractNumColumns(final Memory mem) {
162 | return mem.getInt(NUM_COLUMNS_INT);
163 | }
164 |
165 | static int extractNumRowsUsed(final Memory mem) {
166 | return mem.getInt(ROWS_USED_INT);
167 | }
168 |
169 | static int extractNumColumnsUsed(final Memory mem) {
170 | return mem.getInt(COLS_USED_INT);
171 | }
172 |
173 | // Insertion methods
174 |
175 | static void insertPreLongs(final Object memObj, final long memAddr, final int preLongs) {
176 | unsafe.putByte(memObj, memAddr + PREAMBLE_LONGS_BYTE, (byte) preLongs);
177 | }
178 |
179 | static void insertSerVer(final Object memObj, final long memAddr, final int serVer) {
180 | unsafe.putByte(memObj, memAddr + SER_VER_BYTE, (byte) serVer);
181 | }
182 |
183 | static void insertFamilyID(final Object memObj, final long memAddr, final int matrixFamId) {
184 | unsafe.putByte(memObj, memAddr + FAMILY_BYTE, (byte) matrixFamId);
185 | }
186 |
187 | static void insertFlags(final Object memObj, final long memAddr, final int flags) {
188 | unsafe.putByte(memObj, memAddr + FLAGS_BYTE, (byte) flags);
189 | }
190 |
191 | static void insertNumRows(final Object memObj, final long memAddr, final int numRows) {
192 | unsafe.putInt(memObj, memAddr + NUM_ROWS_INT, numRows);
193 | }
194 |
195 | static void insertNumColumns(final Object memObj, final long memAddr, final int numColumns) {
196 | unsafe.putInt(memObj, memAddr + NUM_COLUMNS_INT, numColumns);
197 | }
198 |
199 | static void insertNumRowsUsed(final Object memObj, final long memAddr, final int rowsUsed) {
200 | unsafe.putInt(memObj, memAddr + ROWS_USED_INT, rowsUsed);
201 | }
202 |
203 | static void insertNumColumnsUsed(final Object memObj, final long memAddr, final int columnsUsed) {
204 | unsafe.putInt(memObj, memAddr + COLS_USED_INT, columnsUsed);
205 | }
206 |
207 |
208 | /**
209 | * Checks Memory for capacity to hold the preamble and returns the extracted preLongs.
210 | * @param mem the given Memory
211 | * @return the extracted prelongs value.
212 | */
213 | private static int getAndCheckPreLongs(final Memory mem) {
214 | final long cap = mem.getCapacity();
215 | if (cap < Long.BYTES) { throwNotBigEnough(cap, Long.BYTES); }
216 | final int preLongs = extractPreLongs(mem);
217 | final int required = Math.max(preLongs << 3, Long.BYTES);
218 | if (cap < required) { throwNotBigEnough(cap, required); }
219 | return preLongs;
220 | }
221 |
222 | private static void throwNotBigEnough(final long cap, final int required) {
223 | throw new IllegalArgumentException(
224 | "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap
225 | + ", Required: " + required);
226 | }
227 | }
228 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/vector/decomposition/PreambleUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.vector.decomposition;
21 |
22 | import static org.apache.datasketches.memory.UnsafeUtil.unsafe;
23 |
24 | import org.apache.datasketches.memory.Memory;
25 | import org.apache.datasketches.vector.MatrixFamily;
26 |
27 | /**
28 | * This class defines the preamble items structure and provides basic utilities for some of the key fields.
29 | *
30 | *
31 | * The low significance bytes of this long items structure are on the right. Multi-byte
32 | * integers (int and long) are stored in native byte order. All byte
33 | * values are treated as unsigned.
34 | *
35 | *
An empty Frequent Directions sketch requires 16 bytes. A non-empty sketch requires 32 bytes
36 | * of preamble. The matrix is dense and is expected to dominate storage.