13 | 1. put the `adult.data` and `adult.test` files in a `raw` folder in project root
14 | 1. Run `RawData2DB.java`.
15 | 1. The database containing the data will appear in `db`.
16 | 1. Manually copy the `DB_CENSUS_INCOME.*` files into the `/usr/local/Datasets` folder. There should be 3 files.
17 |
18 | ## How to Use the Data in You App
19 |
20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
22 |
23 |
24 | try {
25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
26 | BreastCancerDemo demo = new BreastCancerDemo();
27 | demo.go();
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | } finally {
31 | BreastCancerDAO.release(); // release data resources
32 | }
33 |
--------------------------------------------------------------------------------
/datasets-census-income/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | 4.0.0
5 |
6 |
7 | org.knowm.datasets
8 | datasets-parent
9 | 2.2.0-SNAPSHOT
10 |
11 |
12 | datasets-census-income
13 |
14 | Knowm Datasets for machine learning applications
15 | http://knowm.org/open-source/
16 | 2013
17 |
18 |
19 | Knowm Inc.
20 | http://knowm.org
21 |
22 |
23 |
24 |
25 | org.knowm.datasets
26 | datasets-common
27 | 2.2.0-SNAPSHOT
28 |
29 |
30 | org.hsqldb
31 | hsqldb
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/datasets-census-income/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_CENSUS_INCOME;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-census-income/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE CENSUS_INCOME (id INTEGER NOT NULL, age INTEGER NOT NULL, workclass VARCHAR(256) NOT NULL, fnlwgt INTEGER NOT NULL, education VARCHAR(256) NOT NULL, educationNum INTEGER NOT NULL, maritalStatus VARCHAR(256) NOT NULL, occupation VARCHAR(256) NOT NULL, relationship VARCHAR(256) NOT NULL, race VARCHAR(256) NOT NULL, sex VARCHAR(256) NOT NULL, capitalGain INTEGER NOT NULL, capitalLoss INTEGER NOT NULL, hoursPerWeek INTEGER NOT NULL, nativeCountry VARCHAR(256) NOT NULL, incomeLessThan50k TINYINT NOT NULL, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-census-income/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE CENSUS_INCOME (id INTEGER NOT NULL, age INTEGER NOT NULL, workclass VARCHAR(256) NOT NULL, fnlwgt INTEGER NOT NULL, education VARCHAR(256) NOT NULL, educationNum INTEGER NOT NULL, maritalStatus VARCHAR(256) NOT NULL, occupation VARCHAR(256) NOT NULL, relationship VARCHAR(256) NOT NULL, race VARCHAR(256) NOT NULL, sex VARCHAR(256) NOT NULL, capitalGain INTEGER NOT NULL, capitalLoss INTEGER NOT NULL, hoursPerWeek INTEGER NOT NULL, nativeCountry VARCHAR(256) NOT NULL, incomeLessThan50k TINYINT NOT NULL, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-census-income/src/test/java/org/knowm/datasets/censusincome/TestCensusIncomeDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. Copyright 2013-2015 Xeiam
5 | * LLC (http://xeiam.com) and contributors.
6 | *
7 | *
Permission is hereby granted, free of charge, to any person obtaining a copy of this software
8 | * and associated documentation files (the "Software"), to deal in the Software without restriction,
9 | * including without limitation the rights to use, copy, modify, merge, publish, distribute,
10 | * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
11 | * furnished to do so, subject to the following conditions:
12 | *
13 | *
The above copyright notice and this permission notice shall be included in all copies or
14 | * substantial portions of the Software.
15 | *
16 | *
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
17 | * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | */
22 | /**
23 | * This product currently only contains code developed by authors of specific components, as
24 | * identified by the source code files.
25 | *
26 | *
Since product implements StAX API, it has dependencies to StAX API classes.
27 | *
28 | *
For additional credits (generally to people who reported problems) see CREDITS file.
29 | */
30 | package org.knowm.datasets.censusincome;
31 |
32 | import static org.hamcrest.CoreMatchers.equalTo;
33 | import static org.hamcrest.MatcherAssert.assertThat;
34 |
35 | import org.junit.AfterClass;
36 | import org.junit.BeforeClass;
37 | import org.junit.Ignore;
38 | import org.junit.Test;
39 | import org.knowm.yank.PropertiesUtils;
40 | import org.knowm.yank.Yank;
41 |
42 | /** @author timmolter */
43 | @Ignore
44 | public class TestCensusIncomeDAO {
45 |
46 | @BeforeClass
47 | public static void setUpDB() {
48 |
49 | Yank.setupDefaultConnectionPool(
50 | PropertiesUtils.getPropertiesFromClasspath("DB_HSQLDB_FILE.properties"));
51 | }
52 |
53 | @AfterClass
54 | public static void tearDownDB() {
55 |
56 | CensusIncomeDAO.release();
57 | }
58 |
59 | @Test
60 | public void testSelectCount() {
61 |
62 | long count = CensusIncomeDAO.selectCount();
63 | assertThat(count, equalTo(48842L));
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/datasets-cifar10/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets CIFAR-10
2 |
3 | [raw data](http://www.cs.toronto.edu/~kriz/cifar.html)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `DB_CIFAR.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to Generate Data
11 |
12 | 1. Download raw data from here: http://www.cs.toronto.edu/~kriz/cifar.html
13 | 1. put all files in `raw` folder in project root
14 | 1. Run `RawData2DB.java`.
15 | 1. The database containing the data will appear in `db`.
16 | 1. Manually copy the `DB_CIFAR.*` files into the `/usr/local/Datasets` folder. There should be three files.
17 |
18 | ## How to Use the Data in You App
19 |
20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
22 |
23 |
24 | try {
25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
26 | BreastCancerDemo demo = new BreastCancerDemo();
27 | demo.go();
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | } finally {
31 | BreastCancerDAO.release(); // release data resources
32 | }
33 |
--------------------------------------------------------------------------------
/datasets-cifar10/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-cifar10
12 |
13 | Knowm Datasets for machine learning applications
14 | http://knowm.org/open-source/
15 | 2013
16 |
17 |
18 | Knowm Inc.
19 | http://knowm.org
20 |
21 |
22 |
23 |
24 | org.knowm.datasets
25 | datasets-common
26 | 2.2.0-SNAPSHOT
27 |
28 |
29 | org.hsqldb
30 | hsqldb
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.cifar10;
36 |
37 | import org.knowm.datasets.common.business.DatasetsDAO;
38 | import org.knowm.yank.Yank;
39 |
40 | /**
41 | * @author timmolter
42 | */
43 | public class CifarDAO extends DatasetsDAO {
44 |
45 | public static void init(String dataFilesDir) {
46 |
47 | String dataFileID = "0ByP7_A9vXm17VERJam9EMm5sTkU";
48 | String propsFileID = "0ByP7_A9vXm17VHIzd1hSNW4zUXc";
49 | String scriptFileID = "0ByP7_A9vXm17eHlzcDJfalNoYkk";
50 |
51 | init("DB_CIFAR", dataFilesDir, dataFileID, propsFileID, scriptFileID, null, true);
52 | }
53 |
54 | public static int dropTable() {
55 |
56 | return Yank.execute("DROP TABLE IF EXISTS CIFAR", null);
57 | }
58 |
59 | public static int getTrainTestSplit() {
60 |
61 | return 50000;
62 | }
63 |
64 | public static int createTable() {
65 |
66 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null);
67 | }
68 |
69 | public static int insert(Cifar CIFAR) {
70 |
71 | Object[] params = new Object[] {
72 |
73 | // @formatter:off
74 | CIFAR.getId(), CIFAR.getLabel(), CIFAR.getImagedata()
75 | // @formatter:on
76 | };
77 | String CIFAR_INSERT = "INSERT INTO CIFAR (id, label, imagedata) VALUES (?, ?, ?)";
78 | return Yank.execute(CIFAR_INSERT, params);
79 |
80 | }
81 |
82 | public static Cifar selectSingle(int id) {
83 |
84 | Object[] params = new Object[] { id };
85 |
86 | String SELECT_SINGLE = "SELECT * FROM CIFAR WHERE id = ?";
87 |
88 | return Yank.queryBean(SELECT_SINGLE, Cifar.class, params);
89 | }
90 |
91 | public static long selectCount() {
92 |
93 | String SELECT_COUNT = "SELECT COUNT(*) FROM CIFAR";
94 |
95 | return Yank.queryScalar(SELECT_COUNT, Long.class, null);
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarManager.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.cifar10;
36 |
37 | import java.io.IOException;
38 |
39 | public class CifarManager {
40 |
41 | private CifarImageFile images;
42 |
43 | /**
44 | * Constructs an instance managing the two given data files. Supports NULL
value for one of the arguments in case reading only one of
45 | * the files (images and labels) is required.
46 | *
47 | * @param imagesFile Can be NULL
. In that case all future operations using that file will fail.
48 | * @throws IOException
49 | */
50 | public CifarManager(String imagesFile) throws IOException {
51 |
52 | if (imagesFile != null) {
53 | images = new CifarImageFile(imagesFile, "r");
54 | }
55 | }
56 |
57 | /**
58 | * Reads the current image.
59 | *
60 | * @return matrix
61 | * @throws IOException
62 | */
63 | public CifarRaw readImage() throws IOException {
64 |
65 | if (images == null) {
66 | throw new IllegalStateException("Images file not initialized.");
67 | }
68 | return images.readImage();
69 | }
70 |
71 | /**
72 | * Set the position to be read.
73 | *
74 | * @param index
75 | */
76 | public void setCurrent(int index) {
77 |
78 | images.setCurrentIndex(index);
79 | }
80 |
81 | /**
82 | * Get the underlying images file as {@link CifarImageFile}.
83 | *
84 | * @return {@link CifarImageFile}.
85 | */
86 | public CifarImageFile getImages() {
87 |
88 | return images;
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarRaw.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.cifar10;
36 |
37 | /**
38 | * @author timmolter
39 | */
40 | public class CifarRaw {
41 |
42 | private final int label;
43 | private final int[][] redChannel;
44 | private final int[][] greenChannel;
45 | private final int[][] blueChannel;
46 |
47 | /**
48 | * Constructor
49 | *
50 | * @param label
51 | * @param redChannel
52 | * @param greenChannel
53 | * @param blueChannel
54 | */
55 | public CifarRaw(int label, int[][] redChannel, int[][] greenChannel, int[][] blueChannel) {
56 |
57 | this.label = label;
58 | this.redChannel = redChannel;
59 | this.greenChannel = greenChannel;
60 | this.blueChannel = blueChannel;
61 | }
62 |
63 | public int getLabel() {
64 |
65 | return label;
66 | }
67 |
68 | public int[][] getRedChannel() {
69 |
70 | return redChannel;
71 | }
72 |
73 | public int[][] getGreenChannel() {
74 |
75 | return greenChannel;
76 | }
77 |
78 | public int[][] getBlueChannel() {
79 |
80 | return blueChannel;
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarViewer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.cifar10;
36 |
37 | import java.awt.BorderLayout;
38 |
39 | import javax.swing.JFrame;
40 | import javax.swing.JPanel;
41 |
42 | /**
43 | * @author timmolter
44 | */
45 | public class CifarViewer extends JFrame {
46 |
47 | public CifarViewer(JPanel jPanel, String title) {
48 |
49 | add(jPanel, BorderLayout.CENTER);
50 | pack();
51 |
52 | setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
53 | setLocationRelativeTo(null);
54 | setTitle(title);
55 | setResizable(false);
56 | setVisible(true);
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/datasets-cifar10/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_CIFAR;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-cifar10/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE CIFAR (id INTEGER NOT NULL, label INTEGER NOT NULL, imagedata VARCHAR(30000) NOT NULL, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-cifar10/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE CIFAR (id INTEGER NOT NULL, label INTEGER NOT NULL, imagedata VARCHAR(30000) NOT NULL, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-cifar10/src/test/java/org/knowm/datasets/cifar10/TestCifarDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.cifar10;
36 |
37 | import static org.hamcrest.CoreMatchers.equalTo;
38 | import static org.hamcrest.MatcherAssert.assertThat;
39 |
40 | import org.junit.AfterClass;
41 | import org.junit.BeforeClass;
42 | import org.junit.Ignore;
43 | import org.junit.Test;
44 | import org.knowm.datasets.cifar10.CifarDAO;
45 |
46 | /**
47 | * @author timmolter
48 | */
49 | @Ignore
50 | public class TestCifarDAO {
51 |
52 | @BeforeClass
53 | public static void setUpDB() {
54 |
55 | CifarDAO.init(new String[0]);
56 | }
57 |
58 | @AfterClass
59 | public static void tearDownDB() {
60 |
61 | CifarDAO.release();
62 | }
63 |
64 | @Test
65 | public void testSelectCount() {
66 |
67 | long count = CifarDAO.selectCount();
68 | assertThat(count, equalTo(60000L));
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/datasets-common/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets Common Code
2 |
3 |
--------------------------------------------------------------------------------
/datasets-common/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | 4.0.0
5 |
6 |
7 | org.knowm.datasets
8 | datasets-parent
9 | 2.2.0-SNAPSHOT
10 |
11 |
12 | datasets-common
13 |
14 | Knowm Datasets - Common Code
15 | http://knowm.org/open-source/
16 | 2013
17 |
18 |
19 | Knowm Inc.
20 | http://knowm.org
21 |
22 |
23 |
24 |
25 | commons-io
26 | commons-io
27 |
28 |
29 | org.knowm
30 | yank
31 |
32 |
33 | commons-beanutils
34 | commons-beanutils
35 |
36 |
37 | org.knowm.xchart
38 | xchart
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/datasets-common/src/main/java/org/knowm/datasets/common/Joiner.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.common;
36 |
37 | import java.util.List;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class Joiner {
43 |
44 | /**
45 | * Joins a list of Strings
46 | *
47 | * @param separator
48 | * @param topicsArray
49 | * @return
50 | */
51 | public static String join(String separator, List topicsArray) {
52 |
53 | StringBuilder sb = new StringBuilder();
54 | for (int i = 0; i < topicsArray.size(); i++) {
55 |
56 | if (sb.length() > 0) {
57 | sb.append(",");
58 | }
59 | sb.append(topicsArray.get(i));
60 | }
61 | return sb.toString();
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/datasets-common/src/main/java/org/knowm/datasets/common/Splitter.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.common;
36 |
37 | import java.util.Arrays;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class Splitter {
43 |
44 | /**
45 | * Splits a String into an iterable
46 | *
47 | * @param separator
48 | * @param stringToSplit
49 | * @return
50 | */
51 | public static Iterable split(String separator, String stringToSplit) {
52 |
53 | String[] StringArray = stringToSplit.split(separator);
54 |
55 | Iterable iterable = Arrays.asList(StringArray);
56 |
57 | return iterable;
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/datasets-common/src/main/java/org/knowm/datasets/common/business/Bean.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.common.business;
36 |
37 | import java.io.Serializable;
38 |
39 | /**
40 | * Created by timmolter on 5/18/17.
41 | */
42 | public class Bean implements Serializable {
43 |
44 | private int id;
45 |
46 | public int getId() {
47 |
48 | return id;
49 | }
50 |
51 | public void setId(int id) {
52 |
53 | this.id = id;
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/datasets-common/src/test/java/org/knowm/datasets/common/JoinerSplitterTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.common;
36 |
37 | import static org.fest.assertions.api.Assertions.assertThat;
38 |
39 | import java.util.ArrayList;
40 | import java.util.Arrays;
41 | import java.util.Iterator;
42 | import java.util.List;
43 |
44 | import org.junit.Test;
45 |
46 | /**
47 | * @author timmolter
48 | */
49 | public class JoinerSplitterTest {
50 |
51 | @Test
52 | public void test1() {
53 |
54 | String[] stringArray = new String[] { "foo", "bar", "blah" };
55 | List stringList = new ArrayList(Arrays.asList(stringArray));
56 | String csv = Joiner.join(",", stringList);
57 | assertThat(csv).isEqualTo("foo,bar,blah");
58 |
59 | Iterable iterable = Splitter.split(",", csv);
60 | Iterator itr = iterable.iterator();
61 | assertThat(itr.next()).isEqualTo("foo");
62 | assertThat(itr.next()).isEqualTo("bar");
63 | assertThat(itr.next()).isEqualTo("blah");
64 |
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/datasets-common/test/test.csv:
--------------------------------------------------------------------------------
1 | name, age, height
2 | joe, 23, 180.5
--------------------------------------------------------------------------------
/datasets-higgs-boson/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-higgs-boson
12 | higgs-boson
13 |
14 | Knowm Datasets for machine learning applications
15 | http://knowm.org/open-source/
16 | 2013
17 |
18 |
19 | Knowm Inc.
20 | http://knowm.org
21 |
22 |
23 |
24 |
25 | org.knowm.datasets
26 | datasets-common
27 | 2.2.0-SNAPSHOT
28 |
29 |
30 | org.hsqldb
31 | hsqldb
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/datasets-higgs-boson/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_HIGGS_BOSON;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-higgs-boson/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE HIGGS_BOSON (EventId INTEGER NOT NULL, DER_mass_MMC FLOAT NULL, DER_mass_transverse_met_lep FLOAT NULL, DER_mass_vis FLOAT NULL, DER_pt_h FLOAT NULL, DER_deltaeta_jet_jet FLOAT NULL, DER_mass_jet_jet FLOAT NULL, DER_prodeta_jet_jet FLOAT NULL, DER_deltar_tau_lep FLOAT NULL, DER_pt_tot FLOAT NULL, DER_sum_pt FLOAT NULL, DER_pt_ratio_lep_tau FLOAT NULL, DER_met_phi_centrality FLOAT NULL, DER_lep_eta_centrality FLOAT NULL, PRI_tau_pt FLOAT NULL, PRI_tau_eta FLOAT NULL, PRI_tau_phi FLOAT NULL, PRI_lep_pt FLOAT NULL, PRI_lep_eta FLOAT NULL, PRI_lep_phi FLOAT NULL, PRI_met FLOAT NULL, PRI_met_phi FLOAT NULL, PRI_met_sumet FLOAT NULL, PRI_jet_num FLOAT NULL, PRI_jet_leading_pt FLOAT NULL, PRI_jet_leading_eta FLOAT NULL, PRI_jet_leading_phi FLOAT NULL, PRI_jet_subleading_pt FLOAT NULL, PRI_jet_subleading_eta FLOAT NULL, PRI_jet_subleading_phi FLOAT NULL, PRI_jet_all_pt FLOAT NULL, Weight FLOAT NULL, Label VARCHAR(1) NULL, PRIMARY KEY (EventId))";
--------------------------------------------------------------------------------
/datasets-higgs-boson/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE HIGGS_BOSON (EventId INTEGER NOT NULL, DER_mass_MMC FLOAT NULL, DER_mass_transverse_met_lep FLOAT NULL, DER_mass_vis FLOAT NULL, DER_pt_h FLOAT NULL, DER_deltaeta_jet_jet FLOAT NULL, DER_mass_jet_jet FLOAT NULL, DER_prodeta_jet_jet FLOAT NULL, DER_deltar_tau_lep FLOAT NULL, DER_pt_tot FLOAT NULL, DER_sum_pt FLOAT NULL, DER_pt_ratio_lep_tau FLOAT NULL, DER_met_phi_centrality FLOAT NULL, DER_lep_eta_centrality FLOAT NULL, PRI_tau_pt FLOAT NULL, PRI_tau_eta FLOAT NULL, PRI_tau_phi FLOAT NULL, PRI_lep_pt FLOAT NULL, PRI_lep_eta FLOAT NULL, PRI_lep_phi FLOAT NULL, PRI_met FLOAT NULL, PRI_met_phi FLOAT NULL, PRI_met_sumet FLOAT NULL, PRI_jet_num FLOAT NULL, PRI_jet_leading_pt FLOAT NULL, PRI_jet_leading_eta FLOAT NULL, PRI_jet_leading_phi FLOAT NULL, PRI_jet_subleading_pt FLOAT NULL, PRI_jet_subleading_eta FLOAT NULL, PRI_jet_subleading_phi FLOAT NULL, PRI_jet_all_pt FLOAT NULL, Weight FLOAT NULL, Label VARCHAR(1) NULL, PRIMARY KEY (EventId))";
--------------------------------------------------------------------------------
/datasets-hja-birdsong/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets HJA Birdsong
2 |
3 | [raw data](http://web.engr.oregonstate.edu/~briggsf/kdd2012datasets/hja_birdsong/)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `DB_HJA_BIRDSONG.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to Generate Data
11 |
12 | 1. Download raw data from here:
13 | 1. put all files (hierarchy.zip, test-remapped and train-remapped) in `raw` folder in project root
14 | 1. Run `DownloadWavFiles.java` and `RawData2DB.java`.
15 | 1. The database containing the data will appear in `db`.
16 | 1. Manually copy the `DB_HJA_BIRDSONG.*` files into some folder, which you will point to later in an app using the data such as `/usr/local/Datasets/`. There should be four files.
17 |
18 | ## How to Use the Data in You App
19 |
20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
22 |
23 |
24 | try {
25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
26 | BreastCancerDemo demo = new BreastCancerDemo();
27 | demo.go();
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | } finally {
31 | BreastCancerDAO.release(); // release data resources
32 | }
33 |
34 | ## Birdsong Spectrograph Viewer
35 |
36 | See: `org.knowm.datasets.samples.HJABirdsongSpectrogramViewer`. Make sure you've placed the generated/downloaded DB data at the location specified at the line: `HJABirdsongDAO.init("/usr/local/Datasets/"); // setup data`.
37 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-hja-birdsong
12 |
13 | Knowm Datasets for machine learning applications
14 | http://knowm.org/open-source/
15 | 2013
16 |
17 |
18 | Knowm Inc.
19 | http://knowm.org
20 |
21 |
22 |
23 |
24 | org.knowm.datasets
25 | datasets-common
26 | 2.2.0-SNAPSHOT
27 |
28 |
29 | org.hsqldb
30 | hsqldb
31 |
32 |
33 | org.apache.commons
34 | commons-math3
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/com/musicg/dsp/WindowFunction.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (C) 2011 Jacquet Wong
3 | * Copyright (C) 2014 Xeiam
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package com.musicg.dsp;
18 |
19 | /**
20 | * Window functions generator
21 | *
22 | * @author Jacquet Wong
23 | */
24 | public class WindowFunction {
25 |
26 | public enum WindowType {
27 | RECTANGULAR, BARTLETT, HANNING, HAMMING, BLACKMAN
28 | };
29 |
30 | /**
31 | * Generate a window
32 | *
33 | * @param windowType
34 | * @param nSamples size of the window
35 | * @return window in array
36 | */
37 | public double[] generate(WindowType windowType, int nSamples) {
38 |
39 | // generate nSamples window function values
40 | // for index values 0 .. nSamples - 1
41 | int m = nSamples / 2;
42 | double r;
43 | double pi = Math.PI;
44 | double[] w = new double[nSamples];
45 | switch (windowType) {
46 | case BARTLETT: // Bartlett (triangular) window
47 | for (int n = 0; n < nSamples; n++) {
48 | w[n] = 1.0f - Math.abs(n - m) / m;
49 | }
50 | break;
51 | case HANNING: // Hanning window
52 | r = pi / (m + 1);
53 | for (int n = -m; n < m; n++) {
54 | w[m + n] = 0.5f + 0.5f * Math.cos(n * r);
55 | }
56 | break;
57 | case HAMMING: // Hamming window
58 | r = pi / m;
59 | for (int n = -m; n < m; n++) {
60 | w[m + n] = 0.54f + 0.46f * Math.cos(n * r);
61 | }
62 | break;
63 | case BLACKMAN: // Blackman window
64 | r = pi / m;
65 | for (int n = -m; n < m; n++) {
66 | w[m + n] = 0.42f + 0.5f * Math.cos(n * r) + 0.08f * Math.cos(2 * n * r);
67 | }
68 | break;
69 | default: // Rectangular window function
70 | for (int n = 0; n < nSamples; n++) {
71 | w[n] = 1.0f;
72 | }
73 | }
74 | return w;
75 | }
76 | }
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/com/musicg/wave/SpectrogramRender.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (C) 2011 Jacquet Wong
3 | * Copyright (C) 2014 Xeiam
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package com.musicg.wave;
18 |
19 | import java.awt.image.BufferedImage;
20 | import java.io.File;
21 | import java.io.IOException;
22 |
23 | import javax.imageio.ImageIO;
24 |
25 | public class SpectrogramRender {
26 |
27 | /**
28 | * Render a spectrogram of a wave file
29 | *
30 | * @param spectrogram spectrogram object
31 | */
32 | public BufferedImage renderSpectrogram(Spectrogram spectrogram) {
33 |
34 | double[][] spectrogramData = spectrogram.getNormalizedSpectrogramData();
35 |
36 | int width = spectrogramData.length;
37 | int height = spectrogramData[0].length;
38 |
39 | BufferedImage bufferedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
40 | for (int i = 0; i < width; i++) {
41 | for (int j = 0; j < height; j++) {
42 | int value;
43 | value = (int) (spectrogramData[i][j] * 255);
44 | bufferedImage.setRGB(i, j, value << 16 | value << 8 | value);
45 | }
46 | }
47 |
48 | return bufferedImage;
49 |
50 | }
51 |
52 | public void saveSpectrogram(BufferedImage bufferedImage, String filename) throws IOException {
53 |
54 | int dotPos = filename.lastIndexOf(".");
55 | String extension = filename.substring(dotPos + 1);
56 | ImageIO.write(bufferedImage, extension, new File(filename));
57 | }
58 |
59 | /**
60 | * Render a spectrogram of a wave file
61 | *
62 | * @param spectrogram spectrogram object
63 | * @param filename output file
64 | * @throws IOException
65 | * @see RGB graphic rendered
66 | */
67 | public void saveSpectrogram(Spectrogram spectrogram, String filename) throws IOException {
68 |
69 | BufferedImage bufferedImage = renderSpectrogram(spectrogram);
70 | saveSpectrogram(bufferedImage, filename);
71 | }
72 | }
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/org/knowm/datasets/dsp/FastFourierTransform.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.dsp;
36 |
37 | import org.apache.commons.math3.complex.Complex;
38 | import org.apache.commons.math3.transform.DftNormalization;
39 | import org.apache.commons.math3.transform.FastFourierTransformer;
40 | import org.apache.commons.math3.transform.TransformType;
41 |
42 | /**
43 | * FFT object, transform amplitudes to frequency intensities
44 | */
45 | public class FastFourierTransform {
46 |
47 | /**
48 | * Get the frequency intensities
49 | *
50 | * @param amplitudes amplitudes of the signal
51 | */
52 | public double[] getMagnitudes(double[] amplitudes) {
53 |
54 | FastFourierTransformer fft = new FastFourierTransformer(DftNormalization.STANDARD);
55 |
56 | Complex[] result = fft.transform(amplitudes, TransformType.FORWARD);
57 |
58 | // convert from complex to magnitude and only keep the positive frequencies as the FTT creates a symmetric result
59 | int positiveSize = result.length / 2;
60 | double[] magnitude = new double[positiveSize];
61 | for (int i = positiveSize; i < result.length; i++) {
62 | magnitude[i - positiveSize] = result[i].abs();
63 | }
64 |
65 | return magnitude;
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/DownloadWavFiles.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import java.io.File;
38 | import java.io.IOException;
39 | import java.net.URL;
40 | import java.util.List;
41 |
42 | import org.apache.commons.io.FileUtils;
43 |
44 | /**
45 | * @author timmolter
46 | */
47 | public class DownloadWavFiles {
48 |
49 | public static void main(String[] args) throws IOException {
50 |
51 | DownloadWavFiles downloadWavFiles = new DownloadWavFiles();
52 | downloadWavFiles.go();
53 | }
54 |
55 | private void go() throws IOException {
56 |
57 | String baseURL = "http://web.engr.oregonstate.edu/~briggsf/kdd2012datasets/hja_birdsong/src_wavs/";
58 |
59 | List wavNameLines = FileUtils.readLines(new File("./raw/id2filename.txt"), "UTF-8");
60 |
61 | for (int i = 0; i < wavNameLines.size(); i++) {
62 |
63 | String wavNameLine = wavNameLines.get(i);
64 | String wavFileName = wavNameLine.substring(wavNameLine.indexOf(",") + 1, wavNameLine.length()) + ".wav";
65 | System.out.println("downloading: " + wavFileName);
66 | URL url = new URL(baseURL + wavFileName);
67 | File wavDir = new File("./raw/wav/" + wavFileName);
68 | org.apache.commons.io.FileUtils.copyURLToFile(url, wavDir, 5000, 10000);
69 | }
70 |
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/HJABirdsongDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import org.knowm.yank.Yank;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class HJABirdsongDAO extends HJABirdsongParentDAO {
43 |
44 | public static int dropTable() {
45 |
46 | return Yank.execute("DROP TABLE IF EXISTS BIRD_SONGS", null);
47 | }
48 |
49 | public static int createTable() {
50 |
51 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null);
52 | }
53 |
54 | public static int insert(HJABirdSong hJABirdSong) {
55 |
56 | Object[] params = new Object[] {
57 |
58 | // @formatter:off
59 | hJABirdSong.getBagid(), hJABirdSong.getLabels(), hJABirdSong.getWavfilename(), hJABirdSong.getWavbytes()
60 | // @formatter:on
61 | };
62 | String BIRD_SONGS_INSERT = "INSERT INTO BIRD_SONGS (bagid, labels, wavfilename, wavbytes) VALUES (?, ?, ?, ?)";
63 | return Yank.execute(BIRD_SONGS_INSERT, params);
64 | }
65 |
66 | public static HJABirdSong selectSingle(int bagid) {
67 |
68 | Object[] params = new Object[] { bagid };
69 |
70 | String SELECT_SINGLE = "SELECT * FROM BIRD_SONGS WHERE bagid = ?";
71 |
72 | return Yank.queryBean(SELECT_SINGLE, HJABirdSong.class, params);
73 | }
74 |
75 | public static long selectCount() {
76 |
77 | String SELECT_COUNT = "SELECT COUNT(*) FROM BIRD_SONGS";
78 |
79 | return Yank.queryScalar(SELECT_COUNT, Long.class, null);
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/HJABirdsongParentDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import org.knowm.datasets.common.business.DatasetsDAO;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class HJABirdsongParentDAO extends DatasetsDAO {
43 |
44 | public static void init(String dataFilesDir) {
45 |
46 | String dataFileID = "0ByP7_A9vXm17SWZJa09fWnFxbGM";
47 | String propsFileID = "0ByP7_A9vXm17RS1NMllKelJ0MlE";
48 | String scriptFileID = "0ByP7_A9vXm17YXlZelRxV01ZdDg";
49 | String lobsFileID = "0ByP7_A9vXm17WDBoS25pMHZmR0E";
50 |
51 | init("DB_HJA_BIRDSONG", dataFilesDir, dataFileID, propsFileID, scriptFileID, lobsFileID, true);
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/RawData2DBTenFold.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import java.io.File;
38 | import java.io.IOException;
39 | import java.util.Iterator;
40 | import java.util.List;
41 |
42 | import org.apache.commons.io.FileUtils;
43 | import org.knowm.datasets.common.Splitter;
44 |
45 | /**
46 | * @author timmolter
47 | */
48 | public class RawData2DBTenFold {
49 |
50 | int idx = 0;
51 |
52 | public static void main(String[] args) throws IOException {
53 |
54 | TenFoldDAO.init(args);
55 |
56 | TenFoldDAO.dropTable();
57 | TenFoldDAO.createTable();
58 |
59 | RawData2DBTenFold dp = new RawData2DBTenFold();
60 | System.out.println("processing TenFold data...");
61 | dp.go("./raw/hja_birdsong_10_fold.txt");
62 |
63 | System.out.println("done.");
64 |
65 | TenFoldDAO.release();
66 | }
67 |
68 | private void go(String dataFile) throws IOException {
69 |
70 | List lines = FileUtils.readLines(new File(dataFile), "UTF-8");
71 |
72 | for (String line : lines) {
73 |
74 | System.out.println(line);
75 | Iterable splitLine = Splitter.split(",", line);
76 | Iterator itr = splitLine.iterator();
77 | String bagid = itr.next();
78 | String fold = itr.next();
79 | try {
80 | TenFold tenFold = new TenFold();
81 | tenFold.setBagid(Integer.parseInt(bagid));
82 | tenFold.setFold(Integer.parseInt(fold));
83 | TenFoldDAO.insert(tenFold);
84 | System.out.println(tenFold.toString());
85 | idx++;
86 | } catch (Exception e) {
87 | // e.printStackTrace();
88 | // eat it. skip first line in file.
89 | }
90 |
91 | }
92 |
93 | System.out.println("Number parsed: " + idx);
94 |
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/TenFold.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import org.knowm.datasets.common.business.Bean;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class TenFold extends Bean {
43 |
44 | private int bagid;
45 | private int fold;
46 |
47 | public int getId() {
48 |
49 | return bagid;
50 | }
51 |
52 | public void setId(int bagid) {
53 |
54 | this.bagid = bagid;
55 | }
56 |
57 | public int getBagid() {
58 |
59 | return bagid;
60 | }
61 |
62 | public void setBagid(int bagid) {
63 |
64 | this.bagid = bagid;
65 | }
66 |
67 | public int getFold() {
68 |
69 | return fold;
70 | }
71 |
72 | public void setFold(int fold) {
73 |
74 | this.fold = fold;
75 | }
76 |
77 | @Override
78 | public String toString() {
79 |
80 | return "TenFold [bagid=" + bagid + ", fold=" + fold + "]";
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/TenFoldDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import org.knowm.datasets.common.business.DatasetsDAO;
38 | import org.knowm.yank.Yank;
39 |
40 | /**
41 | * @author timmolter
42 | */
43 | public class TenFoldDAO extends DatasetsDAO {
44 |
45 | public static int dropTable() {
46 |
47 | return Yank.execute("DROP TABLE IF EXISTS TEN_FOLD", null);
48 | }
49 |
50 | public static int createTable() {
51 |
52 | return Yank.executeSQLKey("KEY_CREATE_TABLE_TENFOLD", null);
53 | }
54 |
55 | public static int insert(TenFold tenFold) {
56 |
57 | Object[] params = new Object[] {
58 |
59 | // @formatter:off
60 | tenFold.getBagid(), tenFold.getFold()
61 | // @formatter:on
62 | };
63 | String TEN_FOLD_INSERT = "INSERT INTO TEN_FOLD (bagid, fold) VALUES (?, ?)";
64 | return Yank.execute(TEN_FOLD_INSERT, params);
65 | }
66 |
67 | public static TenFold selectSingle(int bagid) {
68 |
69 | Object[] params = new Object[] { bagid };
70 |
71 | String SELECT_SINGLE = "SELECT * FROM TEN_FOLD WHERE bagid = ?";
72 |
73 | return Yank.queryBean(SELECT_SINGLE, TenFold.class, params);
74 | }
75 |
76 | public static long selectCount() {
77 |
78 | String SELECT_COUNT = "SELECT COUNT(*) FROM TEN_FOLD";
79 |
80 | return Yank.queryScalar(SELECT_COUNT, Long.class, null);
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_HJA_BIRDSONG;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE BIRD_SONGS (bagid INTEGER NOT NULL, labels VARCHAR(256) NOT NULL, wavfilename VARCHAR(256) NOT NULL, wavbytes blob NOT NULL, PRIMARY KEY (bagid))
2 | KEY_CREATE_TABLE_TENFOLD=CREATE CACHED TABLE TEN_FOLD (bagid INTEGER NOT NULL, fold INTEGER NOT NULL, PRIMARY KEY (bagid))
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE BIRD_SONGS (bagid INTEGER NOT NULL, labels VARCHAR(256) NOT NULL, wavfilename VARCHAR(256) NOT NULL, wavbytes blob NOT NULL, PRIMARY KEY (bagid))
2 | KEY_CREATE_TABLE_TENFOLD=CREATE TABLE TEN_FOLD (bagid INTEGER NOT NULL, fold INTEGER NOT NULL, PRIMARY KEY (bagid))
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/test/java/org/knowm/datasets/hjabirdsong/TestHJABirdsongDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import static org.hamcrest.CoreMatchers.equalTo;
38 | import static org.hamcrest.MatcherAssert.assertThat;
39 |
40 | import java.sql.SQLException;
41 |
42 | import org.junit.AfterClass;
43 | import org.junit.BeforeClass;
44 | import org.junit.Ignore;
45 | import org.junit.Test;
46 | import org.knowm.datasets.hjabirdsong.HJABirdSong;
47 | import org.knowm.datasets.hjabirdsong.HJABirdsongDAO;
48 |
49 | /**
50 | * @author timmolter
51 | */
52 | @Ignore
53 | public class TestHJABirdsongDAO {
54 |
55 | @BeforeClass
56 | public static void setUpDB() {
57 |
58 | HJABirdsongDAO.init(new String[0]);
59 |
60 | }
61 |
62 | @AfterClass
63 | public static void tearDownDB() {
64 |
65 | HJABirdsongDAO.release();
66 | }
67 |
68 | @Test
69 | public void testSelectCount() {
70 |
71 | long count = HJABirdsongDAO.selectCount();
72 | assertThat(count, equalTo(548L));
73 | }
74 |
75 | @Test
76 | public void testSelect() throws SQLException {
77 |
78 | HJABirdSong hJABirdSong = HJABirdsongDAO.selectSingle(3);
79 | assertThat(hJABirdSong.getBagid(), equalTo(3));
80 | assertThat(hJABirdSong.getLabels(), equalTo("1,3"));
81 | assertThat(hJABirdSong.getLabelsAsArray().get(0), equalTo(1));
82 | assertThat(hJABirdSong.getLabelsAsArray().get(1), equalTo(3));
83 | assertThat(hJABirdSong.getWavfilename(), equalTo("PC13_20090531_050000_10.wav"));
84 | // System.out.println(hJABirdSong.getWavbytes().length());
85 | // System.out.println(new String(hJABirdSong.getWavbytes().getBytes(1, 4)));
86 | // assertThat(new String(hJABirdSong.getWavbytes().getBytes(1, 4)), equalTo("test"));
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/datasets-hja-birdsong/src/test/java/org/knowm/datasets/hjabirdsong/TestTenFoldDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.hjabirdsong;
36 |
37 | import static org.hamcrest.CoreMatchers.equalTo;
38 | import static org.hamcrest.MatcherAssert.assertThat;
39 |
40 | import org.junit.AfterClass;
41 | import org.junit.BeforeClass;
42 | import org.junit.Ignore;
43 | import org.junit.Test;
44 |
45 | /**
46 | * @author timmolter
47 | */
48 | @Ignore
49 | public class TestTenFoldDAO {
50 |
51 | @BeforeClass
52 | public static void setUpDB() {
53 |
54 | TenFoldDAO.init(new String[0]);
55 | }
56 |
57 | @AfterClass
58 | public static void tearDownDB() {
59 |
60 | TenFoldDAO.release();
61 | }
62 |
63 | @Test
64 | public void testSelectCount() {
65 |
66 | long count = TenFoldDAO.selectCount();
67 | assertThat(count, equalTo(548L));
68 | }
69 |
70 | @Test
71 | public void testSelect() {
72 |
73 | TenFold tenFold = TenFoldDAO.selectSingle(3);
74 | assertThat(tenFold.getBagid(), equalTo(3));
75 | assertThat(tenFold.getFold(), equalTo(7));
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/datasets-lshtc4/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets LSHTC4
2 |
3 | [raw data](http://www.kaggle.com/c/lshtc/data)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `LSHTC4.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to Generate Data
11 |
12 | 1. Download raw data from here: http://www.kaggle.com/c/lshtc/data
13 | 1. put all files (hierarchy.zip, test-remapped and train-remapped) in `raw` folder in project root
14 | 1. Run `RawData2DB.java` and `RawData2DBHierarchy.java` with program argument `DB_HSQLDB_FILE.properties`.
15 | 1. The database containing the data will appear in `db`.
16 | 1. Manually copy the `LSHTC4.*` files into the `/usr/local/Datasets` folder. There should be three files.
17 |
18 | ## How to Use the Data in You App
19 |
20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
22 |
23 |
24 | try {
25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
26 | BreastCancerDemo demo = new BreastCancerDemo();
27 | demo.go();
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | } finally {
31 | BreastCancerDAO.release(); // release data resources
32 | }
33 |
34 | ## Data Information
35 |
36 | #### Train Sample Row
37 |
38 | Data
39 | 314523, 165538, 76255, 335416, 416827 1:1 2:1 3:1 4:1 5:1 6:1 7:1 8:1 9:1 10:1 11:1 12:2 13:1 14:2 15:1 16:1 17:1 18:1 19:1 20:1 21:1 22:1 23:1 24:5 25:1 26:1 27:1 28:1 29:1 30:3 31:1 32:2 33:1 34:1 35:1 36:3 37:1 38:1 39:1 40:1 41:3 42:1 43:7 44:1 45:1 46:1 47:1 48:1 49:1 50:1 51:2 52:1 53:1 54:1 55:1 56:1 57:3 58:4 59:1 60:2 61:3 62:1 63:1 64:1 65:1 66:1 67:2 68:1 69:1 70:1 71:1 72:1 73:1 74:1 75:2 76:2 77:1 78:1 79:1 80:1 81:1
40 |
41 | #### Test Sample Row
42 |
43 | Id,Data
44 | 1,0 139:1 153:4 199:1 212:1 232:1 282:1 307:3 310:1 428:1 510:1 528:1 609:1 700:2 709:1 727:1 765:1 791:1 798:2 838:1 872:1 1007:1 1170:2 1374:1 1388:1 1409:1 1435:1 1892:1 2190:1 2197:1 2253:1 2348:2 2570:1 2628:1 2713:1 3066:1 3406:1 3619:2 3628:2 3636:1 3649:2 5068:1 8385:1 9371:1 11248:1 11806:1
45 |
46 |
47 | #### Observations
48 |
49 | longestLabelsStringLength: 1344
50 | longestFeaturesStringLength: 47449
51 | highestFeatureID: 1617899
52 | highestFeatureValue: 1700
53 | highestLabelID: 445729
54 |
55 | Train entries: 2,365,437
56 | Test entries: 452,167
57 | Total Entries: 2,817,603
58 |
59 | Total Hierarchy Entries: 863261
60 |
61 |
--------------------------------------------------------------------------------
/datasets-lshtc4/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-lshtc4
12 |
13 | Knowm Datasets for machine learning applications
14 | http://knowm.org/open-source/
15 | 2014
16 |
17 |
18 | Knowm Inc.
19 | http://knowm.org
20 |
21 |
22 |
23 |
24 | org.knowm.datasets
25 | datasets-common
26 | 2.2.0-SNAPSHOT
27 |
28 |
29 | org.hsqldb
30 | hsqldb
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.lshtc4;
36 |
37 | import org.knowm.datasets.common.business.Bean;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class LSHTC4 extends Bean {
43 |
44 | private String labels;
45 | private String features;
46 |
47 | public String getLabels() {
48 |
49 | return labels;
50 | }
51 |
52 | public String[] getLabelsAsArray() {
53 |
54 | return labels.split(",");
55 | }
56 |
57 | public void setLabels(String labels) {
58 |
59 | this.labels = labels;
60 | }
61 |
62 | public String getFeatures() {
63 |
64 | return features;
65 | }
66 |
67 | public String[] getFeaturesAsArray() {
68 |
69 | return features.split(",");
70 | }
71 |
72 | public void setFeatures(String features) {
73 |
74 | this.features = features;
75 | }
76 |
77 | @Override
78 | public String toString() {
79 |
80 | return "LSHTC4 [id=" + getId() + ", labels=" + labels + ", features=" + features + "]";
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4DAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.lshtc4;
36 |
37 | import org.knowm.yank.Yank;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class LSHTC4DAO extends LSHTC4ParentDAO {
43 |
44 | public static int dropTable() {
45 |
46 | return Yank.execute("DROP TABLE IF EXISTS LSHTC4", null);
47 | }
48 |
49 | public static int getTrainTestSplit() {
50 |
51 | return 452167;
52 | }
53 |
54 | public static int createTable() {
55 |
56 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null);
57 | }
58 |
59 | public static int insert(LSHTC4 lSHTC4) {
60 |
61 | Object[] params = new Object[] {
62 |
63 | // @formatter:off
64 | lSHTC4.getId(), lSHTC4.getLabels(), lSHTC4.getFeatures()
65 | // @formatter:on
66 | };
67 | String LSHTC4_INSERT = "INSERT INTO LSHTC4 (id, labels, features) VALUES (?, ?, ?)";
68 | return Yank.execute(LSHTC4_INSERT, params);
69 |
70 | }
71 |
72 | public static LSHTC4 selectSingle(int id) {
73 |
74 | Object[] params = new Object[] { id };
75 |
76 | String SELECT_SINGLE = "SELECT * FROM LSHTC4 WHERE id = ?";
77 |
78 | return Yank.queryBean(SELECT_SINGLE, LSHTC4.class, params);
79 | }
80 |
81 | public static long selectCount() {
82 |
83 | String SELECT_COUNT = "SELECT COUNT(*) FROM LSHTC4";
84 |
85 | return Yank.queryScalar(SELECT_COUNT, Long.class, null);
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4Hierarchy.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.lshtc4;
36 |
37 | import org.knowm.datasets.common.business.Bean;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class LSHTC4Hierarchy extends Bean {
43 |
44 | int parentid;
45 | int nodeid;
46 |
47 | public int getParentid() {
48 |
49 | return parentid;
50 | }
51 |
52 | public void setParentid(int parentid) {
53 |
54 | this.parentid = parentid;
55 | }
56 |
57 | public int getId() {
58 |
59 | return nodeid;
60 | }
61 |
62 | public void setId(int nodeid) {
63 |
64 | this.nodeid = nodeid;
65 | }
66 |
67 | public int getNodeid() {
68 |
69 | return nodeid;
70 | }
71 |
72 | public void setNodeid(int nodeid) {
73 |
74 | this.nodeid = nodeid;
75 | }
76 |
77 | @Override
78 | public String toString() {
79 |
80 | return "LSHTC4Hierarchy [parentid=" + parentid + ", nodeid=" + nodeid + "]";
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4HierarchyDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.lshtc4;
36 |
37 | import java.util.List;
38 |
39 | import org.knowm.yank.Yank;
40 |
41 | /**
42 | * @author timmolter
43 | */
44 | public class LSHTC4HierarchyDAO extends LSHTC4ParentDAO {
45 |
46 | public static int dropTable() {
47 |
48 | return Yank.execute("DROP TABLE IF EXISTS LSHTC4Hierarchy", null);
49 | }
50 |
51 | public static int createTable() {
52 |
53 | return Yank.executeSQLKey("KEY_CREATE_TABLE_HIERARCHY", null);
54 | }
55 |
56 | public static int insert(LSHTC4Hierarchy lSHTC4Hierarchy) {
57 |
58 | Object[] params = new Object[] {
59 |
60 | // @formatter:off
61 | lSHTC4Hierarchy.getParentid(), lSHTC4Hierarchy.getNodeid()
62 | // @formatter:on
63 | };
64 | String LSHTC4Hierarchy_INSERT = "INSERT INTO LSHTC4Hierarchy (parentid, nodeid) VALUES (?, ?)";
65 | return Yank.execute(LSHTC4Hierarchy_INSERT, params);
66 |
67 | }
68 |
69 | public static List selectAll() {
70 |
71 | String SELECT_ALL = "SELECT * FROM LSHTC4Hierarchy";
72 |
73 | return Yank.queryBeanList(SELECT_ALL, LSHTC4Hierarchy.class, null);
74 | }
75 |
76 | public static LSHTC4Hierarchy selectSingle(int id) {
77 |
78 | Object[] params = new Object[] { id };
79 |
80 | String SELECT_SINGLE = "SELECT * FROM LSHTC4Hierarchy WHERE nodeid = ?";
81 |
82 | return Yank.queryBean(SELECT_SINGLE, LSHTC4Hierarchy.class, params);
83 | }
84 |
85 | public static long selectCount() {
86 |
87 | String SELECT_COUNT = "SELECT COUNT(*) FROM LSHTC4Hierarchy";
88 |
89 | return Yank.queryScalar(SELECT_COUNT, Long.class, null);
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4ParentDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.lshtc4;
36 |
37 | import org.knowm.datasets.common.business.DatasetsDAO;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class LSHTC4ParentDAO extends DatasetsDAO {
43 |
44 | public static void init(String dataFilesDir) {
45 |
46 | String dataFileID = "0ByP7_A9vXm17bFdZVzEyMWhsVFk";
47 | String propsFileID = "0ByP7_A9vXm17Zkl0ajF6LXlrYUE";
48 | String scriptFileID = "0ByP7_A9vXm17ZnljLU1ybS16c2c";
49 |
50 | init("DB_LSHTC4", dataFilesDir, dataFileID, propsFileID, scriptFileID, null, true);
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_LSHTC4;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE LSHTC4 (id INTEGER NOT NULL, labels VARCHAR(1344) NOT NULL, features VARCHAR(47449) NOT NULL, PRIMARY KEY (id))
2 | KEY_CREATE_TABLE_HIERARCHY=CREATE CACHED TABLE LSHTC4Hierarchy (parentid INTEGER NOT NULL, nodeid INTEGER NOT NULL)
--------------------------------------------------------------------------------
/datasets-lshtc4/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE LSHTC4 (id INTEGER NOT NULL, labels VARCHAR(1344) NOT NULL, features VARCHAR(47449) NOT NULL, PRIMARY KEY (id))
2 | KEY_CREATE_TABLE_HIERARCHY=CREATE TABLE LSHTC4Hierarchy (parentid INTEGER NOT NULL, nodeid INTEGER NOT NULL)
--------------------------------------------------------------------------------
/datasets-lshtc4/src/test/java/org/knowm/datasets/lshtc4/TestLSHTC4HierarchyDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.lshtc4;
36 |
37 | import static org.hamcrest.CoreMatchers.equalTo;
38 | import static org.hamcrest.MatcherAssert.assertThat;
39 |
40 | import java.util.List;
41 |
42 | import org.junit.AfterClass;
43 | import org.junit.BeforeClass;
44 | import org.junit.Ignore;
45 | import org.junit.Test;
46 | import org.knowm.datasets.lshtc4.LSHTC4Hierarchy;
47 | import org.knowm.datasets.lshtc4.LSHTC4HierarchyDAO;
48 |
49 | /**
50 | * @author timmolter
51 | */
52 | @Ignore
53 | public class TestLSHTC4HierarchyDAO {
54 |
55 | @BeforeClass
56 | public static void setUpDB() {
57 |
58 | LSHTC4HierarchyDAO.init(new String[0]);
59 |
60 | }
61 |
62 | @AfterClass
63 | public static void tearDownDB() {
64 |
65 | LSHTC4HierarchyDAO.release();
66 | }
67 |
68 | @Test
69 | public void testSelectCount() {
70 |
71 | long count = LSHTC4HierarchyDAO.selectCount();
72 | assertThat(count, equalTo(863261L));
73 |
74 | }
75 |
76 | @Test
77 | public void test() {
78 |
79 | List all = LSHTC4HierarchyDAO.selectAll();
80 | assertThat(all.size(), equalTo(863261));
81 |
82 | // System.out.println(all.get(0));
83 | LSHTC4Hierarchy lSHTC4Hierarchy = LSHTC4HierarchyDAO.selectSingle(2214730);
84 | int parentID = lSHTC4Hierarchy.getParentid();
85 | assertThat(parentID, equalTo(2244783));
86 |
87 | }
88 |
89 | }
90 |
--------------------------------------------------------------------------------
/datasets-mnist/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets MNIST
2 |
3 | [raw data](http://yann.lecun.com/exdb/mnist/)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `DB_MNIST.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to Generate Data
11 |
12 | 1. Download raw data from here: http://yann.lecun.com/exdb/mnist/
13 | 1. put all files in `raw` folder in project root
14 | 1. Unzip all
15 | 1. Run `RawData2DB.java`.
16 | 1. The database containing the data will appear in `db`.
17 | 1. Manually copy the `DB_MNIST.*` files into the `/usr/local/Datasets` folder. There should be four files.
18 |
19 | ## How to Use the Data in You App
20 |
21 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
22 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
23 |
24 |
25 | try {
26 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
27 | BreastCancerDemo demo = new BreastCancerDemo();
28 | demo.go();
29 | } catch (Exception e) {
30 | e.printStackTrace();
31 | } finally {
32 | BreastCancerDAO.release(); // release data resources
33 | }
34 |
--------------------------------------------------------------------------------
/datasets-mnist/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | 4.0.0
5 |
6 |
7 | org.knowm.datasets
8 | datasets-parent
9 | 2.2.0-SNAPSHOT
10 |
11 |
12 | datasets-mnist
13 |
14 | Knowm Datasets for machine learning applications
15 | http://knowm.org/open-source/
16 | 2013
17 |
18 |
19 | Knowm Inc.
20 | http://knowm.org
21 |
22 |
23 |
24 |
25 | org.knowm.datasets
26 | datasets-common
27 | 2.2.0-SNAPSHOT
28 |
29 |
30 | org.hsqldb
31 | hsqldb
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/java/mnist/tools/MnistLabelFile.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (C) 2013-2014 Xeiam LLC http://xeiam.com
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
5 | * this software and associated documentation files (the "Software"), to deal in
6 | * the Software without restriction, including without limitation the rights to
7 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8 | * of the Software, and to permit persons to whom the Software is furnished to do
9 | * so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in all
12 | * copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | * SOFTWARE.
21 | */
22 | package mnist.tools;
23 |
24 | import java.io.FileNotFoundException;
25 | import java.io.IOException;
26 |
27 | /**
28 | * MNIST database label file.
29 | */
30 | public class MnistLabelFile extends MnistDbFile {
31 |
32 | /**
33 | * Creates new MNIST database label file ready for reading.
34 | *
35 | * @param name the system-dependent filename
36 | * @param mode the access mode
37 | * @throws IOException
38 | * @throws FileNotFoundException
39 | */
40 | public MnistLabelFile(String name, String mode) throws FileNotFoundException, IOException {
41 |
42 | super(name, mode);
43 | }
44 |
45 | /**
46 | * Reads the integer at the current position.
47 | *
48 | * @return integer representing the label
49 | * @throws IOException
50 | */
51 | public int readLabel() throws IOException {
52 |
53 | return readUnsignedByte();
54 | }
55 |
56 | @Override
57 | protected int getMagicNumber() {
58 |
59 | return 2049;
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/java/org/knowm/datasets/mnist/Mnist.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. Copyright 2013-2015 Xeiam
5 | * LLC (http://xeiam.com) and contributors.
6 | *
7 | *
Permission is hereby granted, free of charge, to any person obtaining a copy of this software
8 | * and associated documentation files (the "Software"), to deal in the Software without restriction,
9 | * including without limitation the rights to use, copy, modify, merge, publish, distribute,
10 | * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
11 | * furnished to do so, subject to the following conditions:
12 | *
13 | *
The above copyright notice and this permission notice shall be included in all copies or
14 | * substantial portions of the Software.
15 | *
16 | *
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
17 | * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | */
22 | /**
23 | * This product currently only contains code developed by authors of specific components, as
24 | * identified by the source code files.
25 | *
26 | *
Since product implements StAX API, it has dependencies to StAX API classes.
27 | *
28 | *
For additional credits (generally to people who reported problems) see CREDITS file.
29 | */
30 | package org.knowm.datasets.mnist;
31 |
32 | import java.awt.image.BufferedImage;
33 | import java.sql.Blob;
34 | import java.sql.SQLException;
35 |
36 | import org.knowm.datasets.common.business.Bean;
37 |
38 | /** @author timmolter */
39 | public class Mnist extends Bean {
40 |
41 | private int label;
42 | private Blob imgbytes;
43 | private byte[] imageAsByteArray;
44 |
45 | public int getLabel() {
46 |
47 | return label;
48 | }
49 |
50 | public void setLabel(int label) {
51 |
52 | this.label = label;
53 | }
54 |
55 | public Blob getImgbytes() {
56 |
57 | return imgbytes;
58 | }
59 |
60 | public void setImgbytes(Blob imgbytes) {
61 |
62 | this.imgbytes = imgbytes;
63 | try {
64 | this.imageAsByteArray = imgbytes.getBytes(1, (28 * 28));
65 | } catch (SQLException e) {
66 | e.printStackTrace();
67 | }
68 | }
69 |
70 | public byte[][] getImageMatrix() {
71 |
72 | byte[][] imageMatrix = new byte[28][28];
73 | for (int y = 0; y < 28; y++) {
74 | for (int x = 0; x < 28; x++) {
75 | imageMatrix[x][y] = imageAsByteArray[28 * y + x];
76 | }
77 | }
78 |
79 | return imageMatrix;
80 | }
81 |
82 | public BufferedImage getImageAsBufferedImage() {
83 |
84 | byte[][] img = getImageMatrix();
85 | BufferedImage bufferedImage = new BufferedImage(28, 28, BufferedImage.TYPE_INT_RGB);
86 |
87 | for (int x = 0; x < img.length; x++) {
88 | for (int y = 0; y < img[0].length; y++) {
89 | int value = img[x][y] << 16 | img[x][y] << 8 | img[x][y];
90 | bufferedImage.setRGB(x, y, value);
91 | }
92 | }
93 | return bufferedImage;
94 | }
95 |
96 | public String toASCIIImageString() {
97 |
98 | byte[][] img = getImageMatrix();
99 |
100 | StringBuilder sb = new StringBuilder();
101 | for (int i = 0; i < img.length; i++) {
102 | for (int j = 0; j < img.length; j++) {
103 | if (img[i][j] > 10) {
104 | sb.append("**");
105 | } else {
106 | sb.append(" ");
107 | }
108 | }
109 | sb.append(System.getProperty("line.separator"));
110 | }
111 | return sb.toString();
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/java/org/knowm/datasets/mnist/MnistDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.mnist;
36 |
37 | import java.util.List;
38 |
39 | import org.knowm.datasets.common.business.DatasetsDAO;
40 | import org.knowm.yank.Yank;
41 |
42 | /**
43 | * @author timmolter
44 | */
45 | public class MnistDAO extends DatasetsDAO {
46 |
47 | public static void init(String dataFilesDir) {
48 |
49 | String dataFileID = "0ByP7_A9vXm17V1NvVzdOY2dURUU";
50 | String propsFileID = "0ByP7_A9vXm17cGRudjlMQXpTY1U";
51 | String scriptFileID = "0ByP7_A9vXm17Qm5rMzlEa2VrUm8";
52 | String lobsFileID = "0ByP7_A9vXm17Sml5dXdrSXozUjQ";
53 |
54 | init("DB_MNIST", dataFilesDir, dataFileID, propsFileID, scriptFileID, lobsFileID, true);
55 | }
56 |
57 | public static int dropTable() {
58 |
59 | return Yank.execute("DROP TABLE IF EXISTS MNIST", null);
60 | }
61 |
62 | public static int getTrainTestSplit() {
63 |
64 | return 60000;
65 | }
66 |
67 | public static int createTable() {
68 |
69 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null);
70 | }
71 |
72 | public static int insert(Mnist mnist) {
73 |
74 | Object[] params = new Object[]{
75 |
76 | // @formatter:off
77 | mnist.getId(), mnist.getLabel(), mnist.getImgbytes()
78 | // @formatter:on
79 | };
80 | String CENSUS_INCOME_INSERT = "INSERT INTO MNIST (id, label, imgbytes) VALUES (?, ?, ?)";
81 | return Yank.execute(CENSUS_INCOME_INSERT, params);
82 |
83 | }
84 |
85 | public static List selectAll() {
86 |
87 | String SELECT_ALL = "SELECT * FROM MNIST";
88 |
89 | return Yank.queryBeanList(SELECT_ALL, Mnist.class, null);
90 | }
91 |
92 | public static Mnist selectSingle(int id) {
93 |
94 | Object[] params = new Object[]{id};
95 |
96 | String SELECT_SINGLE = "SELECT * FROM MNIST WHERE id = ?";
97 |
98 | return Yank.queryBean(SELECT_SINGLE, Mnist.class, params);
99 | }
100 |
101 | public static long selectCount() {
102 |
103 | String SELECT_COUNT = "SELECT COUNT(*) FROM MNIST";
104 |
105 | return Yank.queryScalar(SELECT_COUNT, Long.class, null);
106 | }
107 |
108 | }
109 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/java/org/knowm/datasets/mnist/RawData2DB.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.mnist;
36 |
37 | import java.io.IOException;
38 | import java.sql.SQLException;
39 | import java.util.Arrays;
40 |
41 | import javax.sql.rowset.serial.SerialBlob;
42 |
43 | import mnist.tools.MnistManager;
44 |
45 | /**
46 | * @author timmolter
47 | */
48 | public class RawData2DB {
49 |
50 | int idx = 0;
51 |
52 | public static void main(String[] args) throws IOException, SQLException {
53 |
54 | MnistDAO.init(new String[0]);
55 |
56 | MnistDAO.dropTable();
57 | MnistDAO.createTable();
58 |
59 | RawData2DB dp = new RawData2DB();
60 | System.out.println("processing MNIST training images...");
61 | dp.go("./raw/train-images-idx3-ubyte", "./raw/train-labels-idx1-ubyte", 1, 60000);
62 | System.out.println("processing MNIST test images...");
63 | dp.go("./raw/t10k-images-idx3-ubyte", "./raw/t10k-labels-idx1-ubyte", 1, 10000);
64 | System.out.println("done.");
65 |
66 | MnistDAO.release();
67 | }
68 |
69 | private void go(String imageDataFile, String labelDataFile, int startIdx, int endIdx) throws IOException, SQLException {
70 |
71 | int longestStringLength = 0;
72 |
73 | MnistManager mnistManager = new MnistManager(imageDataFile, labelDataFile);
74 | for (int n = startIdx; n <= endIdx; n++) {
75 | mnistManager.setCurrent(n); // index of the image that we are interested in
76 |
77 | byte[] imageAsSignedByteArray = mnistManager.readImageAsSignedByteArray();
78 | System.out.println("imageAsByteArray = " + Arrays.toString(imageAsSignedByteArray));
79 |
80 | Mnist mnist = new Mnist();
81 | mnist.setId(idx++);
82 | mnist.setLabel(mnistManager.readLabel());
83 | mnist.setImgbytes(new SerialBlob(imageAsSignedByteArray));
84 | // MnistDAO.insert(mnist);
85 | }
86 | // System.out.println("longestStringLength: " + longestStringLength);
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/java/org/knowm/datasets/mnist/tools/MnistDigitViewer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (C) 2013-2014 Xeiam LLC http://xeiam.com
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
5 | * this software and associated documentation files (the "Software"), to deal in
6 | * the Software without restriction, including without limitation the rights to
7 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8 | * of the Software, and to permit persons to whom the Software is furnished to do
9 | * so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in all
12 | * copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | * SOFTWARE.
21 | */
22 | package org.knowm.datasets.mnist.tools;
23 |
24 | import java.awt.BorderLayout;
25 |
26 | import javax.swing.JFrame;
27 | import javax.swing.JPanel;
28 |
29 | /**
30 | * @author timmolter
31 | */
32 | public class MnistDigitViewer extends JFrame {
33 |
34 | public MnistDigitViewer(JPanel jPanel, String title) {
35 |
36 | add(jPanel, BorderLayout.CENTER);
37 | pack();
38 |
39 | setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
40 | setLocationRelativeTo(null);
41 | setTitle(title);
42 | setResizable(false);
43 | setVisible(true);
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/java/org/knowm/datasets/mnist/tools/MnistImagePanel.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (C) 2013-2014 Xeiam LLC http://xeiam.com
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
5 | * this software and associated documentation files (the "Software"), to deal in
6 | * the Software without restriction, including without limitation the rights to
7 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8 | * of the Software, and to permit persons to whom the Software is furnished to do
9 | * so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in all
12 | * copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | * SOFTWARE.
21 | */
22 | package org.knowm.datasets.mnist.tools;
23 |
24 | import java.awt.Dimension;
25 | import java.awt.Graphics;
26 | import java.awt.geom.AffineTransform;
27 | import java.awt.image.AffineTransformOp;
28 | import java.awt.image.BufferedImage;
29 |
30 | import javax.swing.JPanel;
31 |
32 | import org.knowm.datasets.mnist.Mnist;
33 |
34 | /**
35 | * @author alexnugent
36 | */
37 | public class MnistImagePanel extends JPanel {
38 |
39 | private BufferedImage bufferedImage;
40 |
41 | public MnistImagePanel(Mnist mnistData) {
42 |
43 | bufferedImage = mnistData.getImageAsBufferedImage();
44 |
45 | setPreferredSize(new Dimension(bufferedImage.getWidth(), bufferedImage.getHeight()));
46 | }
47 |
48 | private void scale(int scale) {
49 |
50 | int w = bufferedImage.getWidth();
51 | int h = bufferedImage.getHeight();
52 | BufferedImage after = new BufferedImage(w, h, BufferedImage.TYPE_INT_ARGB);
53 | AffineTransform at = new AffineTransform();
54 | at.scale(scale, scale);
55 | AffineTransformOp scaleOp = new AffineTransformOp(at, AffineTransformOp.TYPE_BILINEAR);
56 | after = scaleOp.filter(bufferedImage, after);
57 |
58 | this.bufferedImage = after;
59 | }
60 |
61 | @Override
62 | public void paintComponent(Graphics g) {
63 |
64 | super.paintComponent(g);
65 | g.drawImage(bufferedImage, 0, 0, null); // see javadoc for more info on the parameters
66 | }
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_MNIST;shutdown=true;hsqldb.lob_file_scale=1
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-mnist/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE MNIST (id INTEGER NOT NULL, label INTEGER NOT NULL, imgbytes blob NOT NULL, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-mnist/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE MNIST (id INTEGER NOT NULL, label INTEGER NOT NULL, imagedata VARCHAR(2333) NOT NULL, imgbytes blob NOT NULL, PRIMARY\
2 | KEY (id))
--------------------------------------------------------------------------------
/datasets-mnist/src/main/resources/simplelogger.properties:
--------------------------------------------------------------------------------
1 | # SLF4J's SimpleLogger configuration file
2 | # Simple implementation of Logger that sends all enabled log messages, for all defined loggers, to System.err.
3 |
4 | # Default logging detail level for all instances of SimpleLogger.
5 | # Must be one of ("trace", "debug", "info", "warn", or "error").
6 | # If not specified, defaults to "info".
7 | org.slf4j.simpleLogger.defaultLogLevel=debug
8 |
9 | # Logging detail level for a SimpleLogger instance named "xxxxx".
10 | # Must be one of ("trace", "debug", "info", "warn", or "error").
11 | # If not specified, the default logging detail level is used.
12 | #org.slf4j.simpleLogger.log.xxxxx=
13 |
14 | # Set to true if you want the current date and time to be included in output messages.
15 | # Default is false, and will output the number of milliseconds elapsed since startup.
16 | #org.slf4j.simpleLogger.showDateTime=false
17 |
18 | # The date and time format to be used in the output messages.
19 | # The pattern describing the date and time format is the same that is used in java.text.SimpleDateFormat.
20 | # If the format is not specified or is invalid, the default format is used.
21 | # The default format is yyyy-MM-dd HH:mm:ss:SSS Z.
22 | #org.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd HH:mm:ss:SSS Z
23 |
24 | # Set to true if you want to output the current thread name.
25 | # Defaults to true.
26 | #org.slf4j.simpleLogger.showThreadName=true
27 |
28 | # Set to true if you want the Logger instance name to be included in output messages.
29 | # Defaults to true.
30 | #org.slf4j.simpleLogger.showLogName=true
31 |
32 | # Set to true if you want the last component of the name to be included in output messages.
33 | # Defaults to false.
34 | #org.slf4j.simpleLogger.showShortLogName=true
--------------------------------------------------------------------------------
/datasets-mnist/src/test/java/org/knowm/datasets/mnist/unit/TestMnistDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.mnist.unit;
36 |
37 | import static org.hamcrest.CoreMatchers.equalTo;
38 | import static org.hamcrest.CoreMatchers.not;
39 | import static org.hamcrest.MatcherAssert.assertThat;
40 |
41 | import org.junit.AfterClass;
42 | import org.junit.BeforeClass;
43 | import org.junit.Ignore;
44 | import org.junit.Test;
45 | import org.knowm.datasets.mnist.Mnist;
46 | import org.knowm.datasets.mnist.MnistDAO;
47 |
48 | /**
49 | * @author timmolter
50 | */
51 | @Ignore
52 | public class TestMnistDAO {
53 |
54 | @BeforeClass
55 | public static void setUpDB() {
56 |
57 | MnistDAO.init(new String[0]);
58 | }
59 |
60 | @AfterClass
61 | public static void tearDownDB() {
62 |
63 | MnistDAO.release();
64 | }
65 |
66 | @Test
67 | public void testSelectCount() {
68 |
69 | long count = MnistDAO.selectCount();
70 | assertThat(count, equalTo(70000L));
71 | }
72 |
73 | @Test
74 | public void testSelectSingle() {
75 |
76 | Mnist mnist = MnistDAO.selectSingle(2);
77 | assertThat(mnist, not(equalTo(null)));
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/datasets-nsl-kdd/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets NSL-KDD
2 |
3 | [raw data](http://nsl.cs.unb.ca/NSL-KDD/)
4 |
5 | Note that the data is no longer available, but it can be accessed as described [here](https://stackoverflow.com/questions/31722843/unable-to-get-nsl-kdd-datasets).
6 |
7 | ## How to Get the Data
8 |
9 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
10 | 1. Manually copy the `DB_NSL_KDD.*` files into the `/usr/local/Datasets` folder. There should be four files.
11 |
12 | ## How to Generate Data
13 |
14 | 1. Download raw data from here: http://nsl.cs.unb.ca/NSL-KDD/
15 | 1. put all files (`KDDTrain+.txt` and `KDDTest+.txt`) in `raw` folder in project root
16 | 1. Run `RawData2DB.java`.
17 | 1. The database containing the data will appear in `db`.
18 | 1. Manually copy the `DB_NSL_KDD.*` files into the `/usr/local/Datasets` folder. There should be three files.
19 |
20 | ## Build jar containing Data
21 |
22 | ## How to Use the Data in You App
23 |
24 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
25 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
26 |
27 |
28 | try {
29 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
30 | BreastCancerDemo demo = new BreastCancerDemo();
31 | demo.go();
32 | } catch (Exception e) {
33 | e.printStackTrace();
34 | } finally {
35 | BreastCancerDAO.release(); // release data resources
36 | }
37 |
--------------------------------------------------------------------------------
/datasets-nsl-kdd/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-nsl-kdd
12 |
13 | Knowm Datasets for machine learning applications
14 | http://knowm.org/open-source/
15 | 2013
16 |
17 |
18 | Knowm Inc.
19 | http://knowm.org
20 |
21 |
22 |
23 |
24 | org.knowm.datasets
25 | datasets-common
26 | 2.2.0-SNAPSHOT
27 |
28 |
29 | org.hsqldb
30 | hsqldb
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/datasets-nsl-kdd/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_NSL_KDD;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-nsl-kdd/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE NSL_KDD (id INTEGER NOT NULL, duration FLOAT, protocol_type VARCHAR(16), service VARCHAR(16), flag VARCHAR(16), src_bytes FLOAT, dst_bytes FLOAT, " + "land VARCHAR(16), " + "wrong_fragment FLOAT, " + "urgent FLOAT, " + "hot FLOAT, " + "num_failed_logins FLOAT, logged_in VARCHAR(16), " + "num_compromised FLOAT, " + "root_shell FLOAT, " + "su_attempted FLOAT, " + "num_root FLOAT, "num_file_creations FLOAT, " + "num_shells FLOAT, " + "num_access_files FLOAT, " + "num_outbound_cmds FLOAT, "is_host_login VARCHAR(16), " + "is_guest_login VARCHAR(16), " + "concurrentcount FLOAT, " + "srv_count FLOAT, " + "serror_rate FLOAT, srv_serror_rate FLOAT, " + "rerror_rate FLOAT, " + "srv_rerror_rate FLOAT, " + "same_srv_rate FLOAT, " + "diff_srv_rate FLOAT, srv_diff_host_rate FLOAT, " + "dst_host_count FLOAT, " + "dst_host_srv_count FLOAT, " + "dst_host_same_srv_rate FLOAT, dst_host_diff_srv_rate FLOAT, " + "dst_host_same_src_port_rate FLOAT, " + "dst_host_srv_diff_host_rate FLOAT, dst_host_serror_rate FLOAT, " + "dst_host_srv_serror_rate FLOAT, " + "dst_host_rerror_rate FLOAT, " + "dst_host_srv_rerror_rate FLOAT, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-nsl-kdd/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE NSL_KDD (id INTEGER NOT NULL, duration FLOAT, protocol_type VARCHAR(16), service VARCHAR(16), flag VARCHAR(16), src_bytes FLOAT, dst_bytes FLOAT, " + "land VARCHAR(16), " + "wrong_fragment FLOAT, " + "urgent FLOAT, " + "hot FLOAT, " + "num_failed_logins FLOAT, logged_in VARCHAR(16), " + "num_compromised FLOAT, " + "root_shell FLOAT, " + "su_attempted FLOAT, " + "num_root FLOAT, "num_file_creations FLOAT, " + "num_shells FLOAT, " + "num_access_files FLOAT, " + "num_outbound_cmds FLOAT, "is_host_login VARCHAR(16), " + "is_guest_login VARCHAR(16), " + "concurrentcount FLOAT, " + "srv_count FLOAT, " + "serror_rate FLOAT, srv_serror_rate FLOAT, " + "rerror_rate FLOAT, " + "srv_rerror_rate FLOAT, " + "same_srv_rate FLOAT, " + "diff_srv_rate FLOAT, srv_diff_host_rate FLOAT, " + "dst_host_count FLOAT, " + "dst_host_srv_count FLOAT, " + "dst_host_same_srv_rate FLOAT, dst_host_diff_srv_rate FLOAT, " + "dst_host_same_src_port_rate FLOAT, " + "dst_host_srv_diff_host_rate FLOAT, dst_host_serror_rate FLOAT, " + "dst_host_srv_serror_rate FLOAT, " + "dst_host_rerror_rate FLOAT, " + "dst_host_srv_rerror_rate FLOAT, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-nsl-kdd/src/test/java/org/knowm/datasets/nslkdd/TestNSLKDDDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.nslkdd;
36 |
37 | import static org.hamcrest.CoreMatchers.equalTo;
38 | import static org.hamcrest.MatcherAssert.assertThat;
39 |
40 | import org.junit.AfterClass;
41 | import org.junit.BeforeClass;
42 | import org.junit.Ignore;
43 | import org.junit.Test;
44 | import org.knowm.datasets.nslkdd.NSLKDD;
45 | import org.knowm.datasets.nslkdd.NSLKDDDAO;
46 |
47 | /**
48 | * @author timmolter
49 | */
50 | @Ignore
51 | public class TestNSLKDDDAO {
52 |
53 | @BeforeClass
54 | public static void setUpDB() {
55 |
56 | NSLKDDDAO.init(new String[0]);
57 |
58 | }
59 |
60 | @AfterClass
61 | public static void tearDownDB() {
62 |
63 | NSLKDDDAO.release();
64 | }
65 |
66 | @Test
67 | public void testSelectCount() {
68 |
69 | long count = NSLKDDDAO.selectCount();
70 | assertThat(count, equalTo(148517L));
71 | }
72 |
73 | @Test
74 | public void testSelectSingle() {
75 |
76 | NSLKDD nSLKDD = NSLKDDDAO.selectSingle(4);
77 | System.out.println(nSLKDD);
78 | assertThat(nSLKDD.getSrc_bytes(), equalTo(199f));
79 | }
80 |
81 | }
82 |
--------------------------------------------------------------------------------
/datasets-numenta/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets Numenta
2 |
3 | [raw data](https://github.com/numenta/NAB)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `DB_NUMENTA.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to populate the DB with NAB Data
11 |
12 | 1. Pull the "data" and "label" directories from https://github.com/numenta/NAB
13 | 1. put both directories in the `raw` folder in project root
14 | 1. Run `RawData2DB.java`
15 | 1. The database containing the data will appear in `db`.
16 | 1. Manually copy the `DB_NUMENTA.*` files into the `/usr/local/Datasets` folder. There should be four files.
17 |
18 | ## How to Use the Data in You App
19 |
20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
22 |
23 |
24 | try {
25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
26 | BreastCancerDemo demo = new BreastCancerDemo();
27 | demo.go();
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | } finally {
31 | BreastCancerDAO.release(); // release data resources
32 | }
33 |
--------------------------------------------------------------------------------
/datasets-numenta/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | 4.0.0
5 |
6 |
7 | org.knowm.datasets
8 | datasets-parent
9 | 2.2.0-SNAPSHOT
10 |
11 |
12 | datasets-numenta
13 |
14 | Knowm Datasets for machine learning applications
15 | http://knowm.org/open-source/
16 | 2013
17 |
18 |
19 | Knowm Inc.
20 | http://knowm.org
21 |
22 |
23 |
24 |
25 | org.knowm.datasets
26 | datasets-common
27 | 2.2.0-SNAPSHOT
28 |
29 |
30 | org.hsqldb
31 | hsqldb
32 |
33 |
34 | com.fasterxml.jackson.core
35 | jackson-core
36 |
37 |
38 | org.codehaus.jackson
39 | jackson-mapper-asl
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/datasets-numenta/src/main/java/org/knowm/datasets/numenta/SeriesPoint.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.numenta;
36 |
37 | import org.knowm.datasets.common.business.Bean;
38 |
39 | public class SeriesPoint extends Bean {
40 |
41 | private String seriesGroup;
42 | private String seriesName;
43 | private long timestamp;
44 | private double value;
45 | private int label;
46 |
47 | public String getSeriesGroup() {
48 | return seriesGroup;
49 | }
50 |
51 | public void setSeriesGroup(String seriesGroup) {
52 | this.seriesGroup = seriesGroup;
53 | }
54 |
55 | public String getSeriesName() {
56 | return seriesName;
57 | }
58 |
59 | public void setSeriesName(String seriesName) {
60 | this.seriesName = seriesName;
61 | }
62 |
63 | public long getTimestamp() {
64 | return timestamp;
65 | }
66 |
67 | public void setTimestamp(long timestamp) {
68 | this.timestamp = timestamp;
69 | }
70 |
71 | public double getValue() {
72 | return value;
73 | }
74 |
75 | public void setValue(double value) {
76 | this.value = value;
77 | }
78 |
79 | public int getLabel() {
80 | return label;
81 | }
82 |
83 | public void setLabel(int label) {
84 | this.label = label;
85 | }
86 |
87 | @Override
88 | public String toString() {
89 |
90 | return "SeriesPoint [id=" + getId() + " ," + "seriesGroup=" + seriesGroup + " ," + "seriesName=" + seriesName + " ," + "timestamp=" + timestamp +
91 | " ," + "value=" + value + " ," + "label=" + label
92 | + "]";
93 | }
94 |
95 | }
96 |
--------------------------------------------------------------------------------
/datasets-numenta/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_NUMENTA;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-numenta/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE NUMENTA (id BIGINT NOT NULL, seriesGroup VARCHAR(500) NOT NULL, seriesName VARCHAR(500) NOT NULL, timestamp BIGINT NOT NULL, value DOUBLE NOT NULL, label TINYINT NOT NULL, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-numenta/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE NUMENTA (id BIGINT NOT NULL, seriesGroup VARCHAR(500) NOT NULL, seriesName VARCHAR(500) NOT NULL, timestamp BIGINT NOT NULL, value DOUBLE NOT NULL, label TINYINT NOT NULL, PRIMARY KEY (id))
--------------------------------------------------------------------------------
/datasets-numenta/src/test/java/org/knowm/datasets/numenta/TestNumentaDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. Copyright 2013-2015 Xeiam
5 | * LLC (http://xeiam.com) and contributors.
6 | *
7 | *
Permission is hereby granted, free of charge, to any person obtaining a copy of this software
8 | * and associated documentation files (the "Software"), to deal in the Software without restriction,
9 | * including without limitation the rights to use, copy, modify, merge, publish, distribute,
10 | * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
11 | * furnished to do so, subject to the following conditions:
12 | *
13 | *
The above copyright notice and this permission notice shall be included in all copies or
14 | * substantial portions of the Software.
15 | *
16 | *
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
17 | * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | */
22 | /**
23 | * This product currently only contains code developed by authors of specific components, as
24 | * identified by the source code files.
25 | *
26 | *
Since product implements StAX API, it has dependencies to StAX API classes.
27 | *
28 | *
For additional credits (generally to people who reported problems) see CREDITS file.
29 | */
30 | package org.knowm.datasets.numenta;
31 |
32 | import java.util.List;
33 |
34 | import org.junit.AfterClass;
35 | import org.junit.BeforeClass;
36 | import org.junit.Ignore;
37 | import org.junit.Test;
38 | import org.knowm.yank.PropertiesUtils;
39 | import org.knowm.yank.Yank;
40 |
41 | @Ignore
42 | public class TestNumentaDAO {
43 |
44 | @BeforeClass
45 | public static void setUpDB() {
46 |
47 | Yank.setupDefaultConnectionPool(
48 | PropertiesUtils.getPropertiesFromClasspath("DB_HSQLDB_FILE.properties"));
49 | }
50 |
51 | @AfterClass
52 | public static void tearDownDB() {
53 |
54 | NumentaDAO.release();
55 | }
56 |
57 | @Test
58 | public void testSelectCount() {
59 |
60 | List points = NumentaDAO.selectAll();
61 | assert (!points.isEmpty());
62 | }
63 |
64 | @Test
65 | public void testOrdering() {
66 |
67 | List names = NumentaDAO.selectSeriesNames();
68 | for (String name : names) {
69 | long last = Long.MIN_VALUE;
70 | List points = NumentaDAO.selectSeries(name);
71 | for (SeriesPoint p : points) {
72 | assert (p.getTimestamp() > last);
73 | }
74 | }
75 | }
76 |
77 | @Test
78 | public void testSeriesCount() {
79 |
80 | assert (NumentaDAO.selectSeries("TravelTime_387").size() == 2500);
81 | assert (NumentaDAO.selectSeries("TravelTime_451").size() == 2162);
82 | assert (NumentaDAO.selectSeries("Twitter_volume_AAPL").size() == 15902);
83 | assert (NumentaDAO.selectSeries("art_daily_jumpsup").size() == 4032);
84 | assert (NumentaDAO.selectSeries("ec2_cpu_utilization_825cc2").size() == 4032);
85 | assert (NumentaDAO.selectSeries("exchange_2_cpm_results").size() == 1624);
86 | assert (NumentaDAO.selectSeries("speed_6005").size() == 2500);
87 | }
88 |
89 | @Test
90 | public void testSeriesNames() {
91 |
92 | List names = NumentaDAO.selectSeriesNames();
93 | for (String name : names) {
94 | System.out.println(name);
95 | }
96 | assert (!names.isEmpty());
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/datasets-pcb/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets PCB
2 |
3 | [raw data](https://www.caa.tuwien.ac.at/cvl/research/cvl-databases/pcb-dslr-dataset/)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `DB_PCB.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to Generate Data
11 |
12 | 1. Download raw data from here: https://www.caa.tuwien.ac.at/cvl/research/cvl-databases/pcb-dslr-dataset/
13 | 1. put all folder after unzipping in `raw` folder in project root
14 | 1. Run `RawData2DB.java`.
15 | 1. The database containing the data will appear in `db`.
16 | 1. Manually copy the `DB_PCB.*` files into the `/usr/local/Datasets` folder. There should be four files.
17 |
18 | ## How to Use the Data in You App
19 |
20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
22 |
23 |
24 | try {
25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
26 | BreastCancerDemo demo = new BreastCancerDemo();
27 | demo.go();
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | } finally {
31 | BreastCancerDAO.release(); // release data resources
32 | }
33 |
--------------------------------------------------------------------------------
/datasets-pcb/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-pcb
12 |
13 | Knowm Datasets for machine learning applications
14 | http://knowm.org/open-source/
15 | 2013
16 |
17 |
18 | Knowm Inc.
19 | http://knowm.org
20 |
21 |
22 |
23 |
24 | org.knowm.datasets
25 | datasets-common
26 | 2.2.0-SNAPSHOT
27 |
28 |
29 | org.hsqldb
30 | hsqldb
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCB.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.pcb;
36 |
37 | import java.sql.Blob;
38 |
39 | import org.knowm.datasets.common.business.Bean;
40 |
41 | /**
42 | * @author timmolter
43 | */
44 | public class PCB extends Bean {
45 |
46 | private Blob imgbytes;
47 |
48 | public Blob getImgbytes() {
49 | return imgbytes;
50 | }
51 |
52 | public void setImgbytes(Blob imgbytes) {
53 | this.imgbytes = imgbytes;
54 | }
55 |
56 | @Override
57 | public String toString() {
58 | return "PCB [id=" + getId() + "]";
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCBAnnotation.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.pcb;
36 |
37 | /**
38 | * @author timmolter
39 | */
40 | public class PCBAnnotation {
41 |
42 | private int pcbid;
43 | private int id;
44 | private float x;
45 | private float y;
46 | private float width;
47 | private float height;
48 | private float rotation;
49 | private String name;
50 |
51 | public int getPcbid() {
52 | return pcbid;
53 | }
54 |
55 | public void setPcbid(int pcbid) {
56 | this.pcbid = pcbid;
57 | }
58 |
59 | public int getId() {
60 | return id;
61 | }
62 |
63 | public void setId(int id) {
64 | this.id = id;
65 | }
66 |
67 | public float getX() {
68 | return x;
69 | }
70 |
71 | public void setX(float x) {
72 | this.x = x;
73 | }
74 |
75 | public float getY() {
76 | return y;
77 | }
78 |
79 | public void setY(float y) {
80 | this.y = y;
81 | }
82 |
83 | public float getWidth() {
84 | return width;
85 | }
86 |
87 | public void setWidth(float width) {
88 | this.width = width;
89 | }
90 |
91 | public float getHeight() {
92 | return height;
93 | }
94 |
95 | public void setHeight(float height) {
96 | this.height = height;
97 | }
98 |
99 | public float getRotation() {
100 | return rotation;
101 | }
102 |
103 | public void setRotation(float rotation) {
104 | this.rotation = rotation;
105 | }
106 |
107 | public String getName() {
108 | return name;
109 | }
110 |
111 | public void setName(String name) {
112 | this.name = name;
113 | }
114 |
115 | @Override
116 | public String toString() {
117 | return "PCBAnnotation [pcbid=" + pcbid + ", id=" + id + ", x=" + x + ", y=" + y + ", width=" + width + ", height=" + height + ", rotation="
118 | + rotation + ", name=" + name + "]";
119 | }
120 |
121 | }
122 |
--------------------------------------------------------------------------------
/datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCBDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.pcb;
36 |
37 | import org.knowm.yank.Yank;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class PCBDAO extends PCBParentDAO {
43 |
44 | public static int dropTable() {
45 |
46 | return Yank.execute("DROP TABLE IF EXISTS PCB", null);
47 | }
48 |
49 | public static int createTable() {
50 |
51 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null);
52 | }
53 |
54 | public static int insert(PCB pcb) {
55 |
56 | Object[] params = new Object[] {
57 |
58 | // @formatter:off
59 | pcb.getId(), pcb.getImgbytes()
60 | // @formatter:on
61 | };
62 | String PCB_INSERT = "INSERT INTO PCB (id, imgbytes) VALUES (?, ?)";
63 | return Yank.execute(PCB_INSERT, params);
64 |
65 | }
66 |
67 | public static PCB selectSingle(int id) {
68 |
69 | Object[] params = new Object[] { id };
70 |
71 | String SELECT_SINGLE = "SELECT * FROM PCB WHERE id = ?";
72 |
73 | return Yank.queryBean(SELECT_SINGLE, PCB.class, params);
74 | }
75 |
76 | public static long selectCount() {
77 |
78 | String SELECT_COUNT = "SELECT COUNT(*) FROM PCB";
79 |
80 | return Yank.queryScalar(SELECT_COUNT, Long.class, null);
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCBParentDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.pcb;
36 |
37 | import org.knowm.datasets.common.business.DatasetsDAO;
38 |
39 | /**
40 | * @author timmolter
41 | */
42 | public class PCBParentDAO extends DatasetsDAO {
43 |
44 | public static void init(String dataFilesDir) {
45 |
46 | String dataFileID = "0ByP7_A9vXm17SWZJa09fWnFxbGM";
47 | String propsFileID = "0ByP7_A9vXm17RS1NMllKelJ0MlE";
48 | String scriptFileID = "0ByP7_A9vXm17YXlZelRxV01ZdDg";
49 | String lobsFileID = "0ByP7_A9vXm17WDBoS25pMHZmR0E";
50 |
51 | init("DB_PCB", dataFilesDir, dataFileID, propsFileID, scriptFileID, lobsFileID, true);
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/datasets-pcb/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_PCB;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-pcb/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE PCB (id INTEGER NOT NULL, imgbytes blob NOT NULL, PRIMARY KEY (id))
2 | KEY_CREATE_TABLE_PCB_ANNOTATIONS=CREATE CACHED TABLE PCB_ANNOTATIONS (pcbid INTEGER NOT NULL, id INTEGER NOT NULL, x FLOAT NOT NULL, y FLOAT NOT NULL, width FLOAT NOT NULL, height FLOAT NOT NULL, rotation FLOAT NOT NULL, name VARCHAR(1000) NULL, CONSTRAINT PRI PRIMARY KEY ( pcbid, id ))
--------------------------------------------------------------------------------
/datasets-pcb/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE PCB (id INTEGER NOT NULL, imgbytes mediumblob NOT NULL, PRIMARY KEY (id))
2 | KEY_CREATE_TABLE_PCB_ANNOTATIONS=CREATE TABLE PCB_ANNOTATIONS (pcbid INTEGER NOT NULL, id INTEGER NOT NULL, x FLOAT NOT NULL, y FLOAT NOT NULL, width FLOAT NOT NULL, height FLOAT NOT NULL, rotation FLOAT NOT NULL, name VARCHAR(1000) NULL, PRIMARY KEY (`pcbid`, `id`))
--------------------------------------------------------------------------------
/datasets-pcb/src/test/java/org/knowm/datasets/pcb/TestPCBDAO.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.pcb;
36 |
37 | import static org.fest.assertions.api.Assertions.assertThat;
38 |
39 | import java.awt.image.BufferedImage;
40 | import java.io.InputStream;
41 | import java.util.List;
42 |
43 | import javax.imageio.ImageIO;
44 |
45 | import org.junit.AfterClass;
46 | import org.junit.BeforeClass;
47 | import org.junit.Ignore;
48 | import org.junit.Test;
49 |
50 | /**
51 | * @author timmolter
52 | */
53 | @Ignore
54 | public class TestPCBDAO {
55 |
56 | @BeforeClass
57 | public static void setUpDB() {
58 |
59 | PCBDAO.init(new String[0]);
60 | PCBAnnotationDAO.init(new String[0]);
61 | }
62 |
63 | @AfterClass
64 | public static void tearDownDB() {
65 |
66 | PCBDAO.release();
67 | PCBAnnotationDAO.release();
68 | }
69 |
70 | @Test
71 | public void testSelectCount() {
72 |
73 | long count = PCBDAO.selectCount();
74 | assertThat(count).isEqualTo(165L);
75 | }
76 |
77 | @Test
78 | public void testImage() {
79 |
80 | PCB pcb = PCBDAO.selectSingle(1);
81 |
82 | try (InputStream bytes = pcb.getImgbytes().getBinaryStream();) {
83 |
84 | BufferedImage bufferedImage = ImageIO.read(bytes);
85 | assertThat(bufferedImage).isNotNull();
86 |
87 | } catch (Exception e) {
88 | e.printStackTrace();
89 | }
90 | }
91 |
92 | @Test
93 | public void testAnnotations() {
94 |
95 | long count = PCBAnnotationDAO.selectCount();
96 | System.out.println("" + count);
97 |
98 | List pcbAnnotations = PCBAnnotationDAO.selectList(1);
99 |
100 | assertThat(pcbAnnotations).hasSize(23);
101 |
102 | for (PCBAnnotation pcbAnnotation : pcbAnnotations) {
103 |
104 | System.out.println(pcbAnnotation.toString());
105 | }
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/datasets-reuters-21578/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets Reuters-21578
2 |
3 | [raw data](http://archive.ics.uci.edu/ml/support/Reuters-21578+Text+Categorization+Collection)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `DB_PCB.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to Generate Data
11 |
12 | 1. Download raw data (`reuters21578.tar.gz`) from here: http://archive.ics.uci.edu/ml/machine-learning-databases/reuters21578-mld/
13 | 1. unzip and put all files in `raw` folder in project root
14 | 1. Run `RawData2DB.java`.
15 | 1. The database containing the data will appear in `db`.
16 | 1. Manually copy the `DB_REUTERS_21578.*` files into the `/usr/local/Datasets` folder. There should be three files.
17 |
18 | ## How to Use the Data in You App
19 |
20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
22 |
23 |
24 | try {
25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
26 | BreastCancerDemo demo = new BreastCancerDemo();
27 | demo.go();
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | } finally {
31 | BreastCancerDAO.release(); // release data resources
32 | }
33 |
--------------------------------------------------------------------------------
/datasets-reuters-21578/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | 4.0.0
5 |
6 |
7 | org.knowm.datasets
8 | datasets-parent
9 | 2.2.0-SNAPSHOT
10 |
11 |
12 | datasets-reuters-21578
13 |
14 | Knowm Datasets for machine learning applications
15 | http://knowm.org/open-source/
16 | 2013
17 |
18 |
19 | Knowm Inc.
20 | http://knowm.org
21 |
22 |
23 |
24 |
25 | org.knowm.datasets
26 | datasets-common
27 | 2.2.0-SNAPSHOT
28 |
29 |
30 | org.hsqldb
31 | hsqldb
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/datasets-reuters-21578/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_REUTERS_21578;shutdown=true
2 | username=sa
3 | password=
--------------------------------------------------------------------------------
/datasets-reuters-21578/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE REUTERS_21578 (NEWID INTEGER NOT NULL, OLDID INTEGER NOT NULL, TOPICSBOOL TINYINT NULL, LEWISSPLIT VARCHAR(256) NULL, CGISPLIT VARCHAR(256) NULL, DATE TIME NULL, TOPICS VARCHAR(256) NULL, PLACES VARCHAR(256) NULL, PEOPLE VARCHAR(256) NULL, ORGS VARCHAR(256) NULL, EXCHANGES VARCHAR(256) NULL, COMPANIES VARCHAR(256) NULL, TITLE VARCHAR(256) NULL, DATELINE VARCHAR(256) NULL, BODY VARCHAR(13500) NULL, PRIMARY KEY (NEWID))
--------------------------------------------------------------------------------
/datasets-reuters-21578/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE REUTERS_21578 (NEWID INTEGER NOT NULL, OLDID INTEGER NOT NULL, TOPICSBOOL TINYINT NULL, LEWISSPLIT VARCHAR(256) NULL, CGISPLIT VARCHAR(256) NULL, DATE TIME NULL, TOPICS VARCHAR(256) NULL, PLACES VARCHAR(256) NULL, PEOPLE VARCHAR(256) NULL, ORGS VARCHAR(256) NULL, EXCHANGES VARCHAR(256) NULL, COMPANIES VARCHAR(256) NULL, TITLE VARCHAR(256) NULL, DATELINE VARCHAR(256) NULL, BODY VARCHAR(13500) NULL, PRIMARY KEY (NEWID))
--------------------------------------------------------------------------------
/datasets-samples/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-samples
12 |
13 |
14 |
15 | org.knowm.datasets
16 | datasets-common
17 |
18 |
19 | org.hsqldb
20 | hsqldb
21 | compile
22 |
23 |
24 | org.knowm.datasets
25 | datasets-breast-cancer-wisconsin-original
26 | 2.2.0-SNAPSHOT
27 |
28 |
29 | org.knowm.datasets
30 | datasets-hja-birdsong
31 | 2.2.0-SNAPSHOT
32 |
33 |
34 | org.knowm.datasets
35 | datasets-census-income
36 | 2.2.0-SNAPSHOT
37 |
38 |
39 | org.knowm.datasets
40 | datasets-cifar10
41 | 2.2.0-SNAPSHOT
42 |
43 |
44 | org.knowm.datasets
45 | datasets-lshtc4
46 | 2.2.0-SNAPSHOT
47 |
48 |
49 | org.knowm.datasets
50 | datasets-mnist
51 | 2.2.0-SNAPSHOT
52 |
53 |
54 | org.knowm.datasets
55 | datasets-reuters-21578
56 | 2.2.0-SNAPSHOT
57 |
58 |
59 | org.knowm.datasets
60 | datasets-nsl-kdd
61 | 2.2.0-SNAPSHOT
62 |
63 |
64 | org.knowm.datasets
65 | datasets-ucsd-anomaly
66 | 2.2.0-SNAPSHOT
67 |
68 |
69 | org.knowm.datasets
70 | datasets-higgs-boson
71 | 2.2.0-SNAPSHOT
72 |
73 |
74 | org.knowm.datasets
75 | datasets-pcb
76 | 2.2.0-SNAPSHOT
77 |
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/BreastCancerDataInspector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import java.util.ArrayList;
38 | import java.util.List;
39 |
40 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancer;
41 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancerDAO;
42 | import org.knowm.datasets.common.data.HistogramDataInspector;
43 |
44 | /**
45 | * @author timmolter
46 | */
47 | public class BreastCancerDataInspector extends HistogramDataInspector {
48 |
49 | private List malignant = new ArrayList();
50 | private List benign = new ArrayList();
51 |
52 | public static void main(String[] args) {
53 |
54 | try {
55 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
56 | // TRAIN_TEST_SPLIT = EventDAO.selectCount() * 4 / 5;
57 |
58 | BreastCancerDataInspector breastCancerDataInspector = new BreastCancerDataInspector();
59 | breastCancerDataInspector.generateTrainAndTestArraysTruncated();
60 | breastCancerDataInspector.go();
61 |
62 | } catch (Exception e) {
63 | e.printStackTrace();
64 | } finally {
65 | BreastCancerDAO.release(); // release data resources
66 | }
67 | }
68 |
69 | private void generateTrainAndTestArraysTruncated() {
70 |
71 | int numSamples = (int) BreastCancerDAO.selectCount();
72 | for (int i = 0; i < numSamples; i++) {
73 |
74 | BreastCancer breastCancer = BreastCancerDAO.selectSingle(i);
75 | if (breastCancer.getCellClass() == 4) {
76 | malignant.add(breastCancer);
77 | } else {
78 | benign.add(breastCancer);
79 | }
80 | }
81 | }
82 |
83 | @Override
84 | public List getFirstSamples() {
85 |
86 | return malignant;
87 | }
88 |
89 | @Override
90 | public List getSecondSamples() {
91 |
92 | return benign;
93 | }
94 |
95 | @Override
96 | public String getFirstLabel() {
97 |
98 | return "Malignant";
99 | }
100 |
101 | @Override
102 | public String getSecondLabel() {
103 |
104 | return "Benign";
105 | }
106 |
107 | }
108 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/BreastCancerDemo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancer;
38 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancerDAO;
39 |
40 | /**
41 | * @author timmolter
42 | */
43 | public class BreastCancerDemo {
44 |
45 | public static void main(String[] args) {
46 |
47 | try {
48 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
49 | BreastCancerDemo demo = new BreastCancerDemo();
50 | demo.go();
51 | } catch (Exception e) {
52 | e.printStackTrace();
53 | } finally {
54 | BreastCancerDAO.release(); // release data resources
55 | }
56 | }
57 |
58 | private void go() {
59 |
60 | // print number of objects
61 | long count = BreastCancerDAO.selectCount();
62 | System.out.println("count= " + count);
63 |
64 | // loop through train objects
65 | for (int i = 0; i < BreastCancerDAO.getTrainTestSplit(); i++) {
66 | BreastCancer breastCancer = BreastCancerDAO.selectSingle(i);
67 | System.out.println(breastCancer.toString());
68 | }
69 |
70 | // loop through test objects
71 | for (int i = BreastCancerDAO.getTrainTestSplit(); i < count; i++) {
72 | BreastCancer breastCancer = BreastCancerDAO.selectSingle(i);
73 | System.out.println(breastCancer.toString());
74 | }
75 |
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/CensusIncomeDemo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import org.knowm.datasets.censusincome.CensusIncome;
38 | import org.knowm.datasets.censusincome.CensusIncomeDAO;
39 |
40 | /**
41 | * @author timmolter
42 | */
43 | public class CensusIncomeDemo {
44 |
45 | public static void main(String[] args) {
46 |
47 | try {
48 | CensusIncomeDAO.init("/usr/local/Datasets"); // setup data
49 | CensusIncomeDemo demo = new CensusIncomeDemo();
50 | demo.go();
51 | } catch (Exception e) {
52 | e.printStackTrace();
53 | } finally {
54 | CensusIncomeDAO.release(); // release data resources
55 | }
56 | }
57 |
58 | private void go() {
59 |
60 | // print number of objects
61 | long count = CensusIncomeDAO.selectCount();
62 | System.out.println("count= " + count);
63 |
64 | // loop through train objects
65 | for (int i = 0; i < CensusIncomeDAO.getTrainTestSplit(); i++) {
66 | CensusIncome censusIncome = CensusIncomeDAO.selectSingle(i);
67 | System.out.println(censusIncome.toString());
68 | }
69 |
70 | // loop through test objects
71 | for (int i = CensusIncomeDAO.getTrainTestSplit(); i < count; i++) {
72 | CensusIncome censusIncome = CensusIncomeDAO.selectSingle(i);
73 | System.out.println(censusIncome.toString());
74 | }
75 |
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/Cifar10Demo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import org.knowm.datasets.cifar10.Cifar;
38 | import org.knowm.datasets.cifar10.CifarDAO;
39 |
40 | /**
41 | * @author timmolter
42 | */
43 | public class Cifar10Demo {
44 |
45 | public static void main(String[] args) {
46 |
47 | try {
48 | CifarDAO.init("/usr/local/Datasets"); // setup data
49 | Cifar10Demo demo = new Cifar10Demo();
50 | demo.go();
51 | } catch (Exception e) {
52 | e.printStackTrace();
53 | } finally {
54 | CifarDAO.release(); // release data resources
55 | }
56 | }
57 |
58 | private void go() {
59 |
60 | // print number of objects
61 | long count = CifarDAO.selectCount();
62 | System.out.println("count= " + count);
63 |
64 | // loop through train objects
65 | for (int i = 0; i < CifarDAO.getTrainTestSplit(); i++) {
66 | Cifar cifar = CifarDAO.selectSingle(i);
67 | System.out.println(cifar.toString());
68 | }
69 |
70 | // loop through test objects
71 | for (int i = CifarDAO.getTrainTestSplit(); i < count; i++) {
72 | Cifar cifar = CifarDAO.selectSingle(i);
73 | System.out.println(cifar.toString());
74 | }
75 | }
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/CifarImageDisplayApp.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import javax.swing.JPanel;
38 |
39 | import org.knowm.datasets.cifar10.Cifar;
40 | import org.knowm.datasets.cifar10.CifarDAO;
41 | import org.knowm.datasets.cifar10.CifarImagePanel;
42 | import org.knowm.datasets.cifar10.CifarViewer;
43 |
44 | /**
45 | * @author alexnugent
46 | */
47 | public class CifarImageDisplayApp {
48 |
49 | /**
50 | * This app takes the following arguments:
51 | *
52 | * - int image index (0): Image index [0-69,999]
53 | *
54 | * @param args
55 | */
56 | public static void main(String[] args) {
57 |
58 | CifarDAO.init("/usr/local/Datasets"); // setup data
59 | try {
60 | CifarImageDisplayApp cifarImageDisplayApp = new CifarImageDisplayApp();
61 | cifarImageDisplayApp.go(args);
62 | } catch (Exception e) {
63 | // eat it.
64 | } finally {
65 | CifarDAO.release();// release data resources
66 | }
67 | }
68 |
69 | public void go(String[] args) {
70 |
71 | int imageIndex = 836;
72 |
73 | try {
74 | imageIndex = Integer.parseInt(args[0]);
75 |
76 | } catch (java.lang.ArrayIndexOutOfBoundsException e) {
77 | // just ignore
78 | }
79 |
80 | Cifar cifar = CifarDAO.selectSingle(imageIndex);
81 |
82 | int[][][] red = cifar.getRGBImage();
83 |
84 | // paint the patches
85 | JPanel cifarImagePanel = new CifarImagePanel(red, 4);
86 | new CifarViewer(cifarImagePanel, "Index = " + cifar.getId() + " label = " + cifar.getLabelWord());
87 |
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/LSHTC4Demo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import org.knowm.datasets.lshtc4.LSHTC4;
38 | import org.knowm.datasets.lshtc4.LSHTC4DAO;
39 | import org.knowm.datasets.lshtc4.LSHTC4HierarchyDAO;
40 |
41 | /**
42 | * @author timmolter
43 | */
44 | public class LSHTC4Demo {
45 |
46 | public static void main(String[] args) {
47 |
48 | try {
49 | LSHTC4DAO.init("/usr/local/Datasets"); // setup data
50 | LSHTC4Demo demo = new LSHTC4Demo();
51 | demo.go();
52 | } catch (Exception e) {
53 | e.printStackTrace();
54 | } finally {
55 | LSHTC4DAO.release(); // release data resources
56 | }
57 | }
58 |
59 | private void go() {
60 |
61 | // print number of objects
62 | long count = LSHTC4DAO.selectCount();
63 | System.out.println("count= " + count);
64 |
65 | // print number of LSHTC4Hierarchy objects
66 | long hierarchyCount = LSHTC4HierarchyDAO.selectCount();
67 | System.out.println("hierarchyCount= " + hierarchyCount);
68 |
69 | // loop through test objects
70 | for (int i = 0; i < LSHTC4DAO.getTrainTestSplit(); i++) {
71 | LSHTC4 lSHTC4 = LSHTC4DAO.selectSingle(i);
72 | System.out.println(lSHTC4.toString());
73 | }
74 |
75 | // loop through train objects
76 | for (int i = LSHTC4DAO.getTrainTestSplit(); i < count; i++) {
77 | LSHTC4 lSHTC4 = LSHTC4DAO.selectSingle(i);
78 | System.out.println(lSHTC4.toString());
79 | }
80 |
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/MNISTDemo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import org.knowm.datasets.mnist.Mnist;
38 | import org.knowm.datasets.mnist.MnistDAO;
39 |
40 | /**
41 | * @author timmolter
42 | */
43 | public class MNISTDemo {
44 |
45 | public static void main(String[] args) {
46 |
47 | try {
48 | MnistDAO.init("/usr/local/Datasets"); // setup data
49 | MNISTDemo demo = new MNISTDemo();
50 | demo.go();
51 | } catch (Exception e) {
52 | e.printStackTrace();
53 | } finally {
54 | MnistDAO.release(); // release data resources
55 | }
56 | }
57 |
58 | private void go() {
59 |
60 | // print number of objects
61 | long count = MnistDAO.selectCount();
62 | System.out.println("count= " + count);
63 |
64 | // loop through train objects
65 | for (int i = 0; i < MnistDAO.getTrainTestSplit(); i++) {
66 | Mnist mnist = MnistDAO.selectSingle(i);
67 | System.out.println(mnist.toString());
68 | }
69 |
70 | // loop through test objects
71 | for (int i = MnistDAO.getTrainTestSplit(); i < count; i++) {
72 | Mnist mnist = MnistDAO.selectSingle(i);
73 | System.out.println(mnist.toString());
74 | }
75 | }
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/MnistImageDisplayApp.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import javax.swing.JPanel;
38 |
39 | import org.knowm.datasets.mnist.Mnist;
40 | import org.knowm.datasets.mnist.MnistDAO;
41 | import org.knowm.datasets.mnist.tools.MnistDigitViewer;
42 | import org.knowm.datasets.mnist.tools.MnistImagePanel;
43 |
44 | /**
45 | * @author alexnugent
46 | */
47 | public class MnistImageDisplayApp {
48 |
49 | /**
50 | * This app takes the following arguments:
51 | *
52 | * - int image index (0): Image index [0-69,999]
53 | *
54 | * @param args
55 | */
56 | public static void main(String[] args) {
57 |
58 | try {
59 | MnistDAO.init("/usr/local/Datasets"); // setup data
60 | System.out.println("here");
61 | MnistImageDisplayApp mnistImageDisplayApp = new MnistImageDisplayApp();
62 | mnistImageDisplayApp.go(args);
63 | } catch (Exception e) {
64 | // eat it.
65 | } finally {
66 | MnistDAO.release(); // release data resources
67 | }
68 |
69 | }
70 |
71 | public void go(String[] args) {
72 |
73 | int imageIndex = 0;
74 |
75 | try {
76 | imageIndex = Integer.parseInt(args[0]);
77 |
78 | } catch (java.lang.ArrayIndexOutOfBoundsException e) {
79 | // just ignore
80 | }
81 |
82 | Mnist mnistData = MnistDAO.selectSingle(imageIndex);
83 | // paint the patches
84 | JPanel mnistImagePanel = new MnistImagePanel(mnistData);
85 | new MnistDigitViewer(mnistImagePanel, "Index = " + mnistData.getId() + " label = " + mnistData.getLabel());
86 |
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/NSLKDDDemo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import org.knowm.datasets.nslkdd.NSLKDD;
38 | import org.knowm.datasets.nslkdd.NSLKDDDAO;
39 |
40 | /**
41 | * @author timmolter
42 | */
43 | public class NSLKDDDemo {
44 |
45 | public static void main(String[] args) {
46 |
47 | try {
48 | NSLKDDDAO.init("/usr/local/Datasets"); // setup data
49 | NSLKDDDemo demo = new NSLKDDDemo();
50 | demo.go();
51 | } catch (Exception e) {
52 | e.printStackTrace();
53 | } finally {
54 | NSLKDDDAO.release(); // release data resources
55 | }
56 | }
57 |
58 | private void go() {
59 |
60 | // print number of objects
61 | long count = NSLKDDDAO.selectCount();
62 | System.out.println("count= " + count);
63 |
64 | // loop through train objects
65 | for (int i = 0; i < NSLKDDDAO.getTrainTestSplit(); i++) {
66 | NSLKDD nSLKDD = NSLKDDDAO.selectSingle(i);
67 | System.out.println(nSLKDD.toString());
68 | }
69 |
70 | // loop through test objects
71 | for (int i = NSLKDDDAO.getTrainTestSplit(); i < count; i++) {
72 | NSLKDD nSLKDD = NSLKDDDAO.selectSingle(i);
73 | System.out.println(nSLKDD.toString());
74 | }
75 | }
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/java/org/knowm/datasets/samples/Reuters21578Demo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors.
5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors.
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of
8 | * this software and associated documentation files (the "Software"), to deal in
9 | * the Software without restriction, including without limitation the rights to
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
11 | * of the Software, and to permit persons to whom the Software is furnished to do
12 | * so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | */
25 | /**
26 | * This product currently only contains code developed by authors
27 | * of specific components, as identified by the source code files.
28 | *
29 | * Since product implements StAX API, it has dependencies to StAX API
30 | * classes.
31 | *
32 | * For additional credits (generally to people who reported problems)
33 | * see CREDITS file.
34 | */
35 | package org.knowm.datasets.samples;
36 |
37 | import java.util.List;
38 |
39 | import org.knowm.datasets.reuters21578.Reuters21578;
40 | import org.knowm.datasets.reuters21578.Reuters21578DAO;
41 |
42 | /**
43 | * @author timmolter
44 | */
45 | public class Reuters21578Demo {
46 |
47 | public static void main(String[] args) {
48 |
49 | try {
50 | Reuters21578DAO.init("/usr/local/Datasets"); // setup data
51 | Reuters21578Demo demo = new Reuters21578Demo();
52 | demo.go();
53 | } catch (Exception e) {
54 | e.printStackTrace();
55 | } finally {
56 | Reuters21578DAO.release(); // release data resources
57 | }
58 | }
59 |
60 | private void go() {
61 |
62 | // print number of objects
63 | long count = Reuters21578DAO.selectCount();
64 | System.out.println("count= " + count);
65 |
66 | // loop through train objects
67 | List trainSet = Reuters21578DAO.selectModApte("TRAIN", true);
68 | for (Reuters21578 reuters21578 : trainSet) {
69 | System.out.println(reuters21578.toString());
70 | }
71 |
72 | // loop through test objects
73 | List testSet = Reuters21578DAO.selectModApte("TEST", true);
74 | for (Reuters21578 reuters21578 : testSet) {
75 | System.out.println(reuters21578.toString());
76 | }
77 | }
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/datasets-samples/src/main/resources/simplelogger.properties:
--------------------------------------------------------------------------------
1 | # SLF4J's SimpleLogger configuration file
2 | # Simple implementation of Logger that sends all enabled log messages, for all defined loggers, to System.err.
3 |
4 | # Default logging detail level for all instances of SimpleLogger.
5 | # Must be one of ("trace", "debug", "info", "warn", or "error").
6 | # If not specified, defaults to "info".
7 | org.slf4j.simpleLogger.defaultLogLevel=debug
8 |
9 | # Logging detail level for a SimpleLogger instance named "xxxxx".
10 | # Must be one of ("trace", "debug", "info", "warn", or "error").
11 | # If not specified, the default logging detail level is used.
12 | #org.slf4j.simpleLogger.log.xxxxx=
13 |
14 | # Set to true if you want the current date and time to be included in output messages.
15 | # Default is false, and will output the number of milliseconds elapsed since startup.
16 | #org.slf4j.simpleLogger.showDateTime=false
17 |
18 | # The date and time format to be used in the output messages.
19 | # The pattern describing the date and time format is the same that is used in java.text.SimpleDateFormat.
20 | # If the format is not specified or is invalid, the default format is used.
21 | # The default format is yyyy-MM-dd HH:mm:ss:SSS Z.
22 | #org.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd HH:mm:ss:SSS Z
23 |
24 | # Set to true if you want to output the current thread name.
25 | # Defaults to true.
26 | #org.slf4j.simpleLogger.showThreadName=true
27 |
28 | # Set to true if you want the Logger instance name to be included in output messages.
29 | # Defaults to true.
30 | #org.slf4j.simpleLogger.showLogName=true
31 |
32 | # Set to true if you want the last component of the name to be included in output messages.
33 | # Defaults to false.
34 | #org.slf4j.simpleLogger.showShortLogName=true
--------------------------------------------------------------------------------
/datasets-ucsd-anomaly/README.md:
--------------------------------------------------------------------------------
1 | ## Knowm Datasets UCSD Anomaly Detection Dataset
2 |
3 | [raw data](http://www.svcl.ucsd.edu/projects/anomaly/dataset.html)
4 |
5 | ## How to Get the Data
6 |
7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list).
8 | 1. Manually copy the `DB_UCSD_ANOMALY.*` files into the `/usr/local/Datasets` folder. There should be four files.
9 |
10 | ## How to Generate Data
11 |
12 | 1. Download raw data from here: http://www.svcl.ucsd.edu/projects/anomaly/dataset.html
13 | 1. put all files in `raw` folder in project root
14 | 1. Convert tifs to pngs because Java won't work with these tifs for some reason
15 |
16 | find . -iname "*.tif" -type f -exec sh -c 'sips -s format png "$0" --out "${0%.tif}.png"' {} \;
17 |
18 | 1. Run `RawData2DB.java`.
19 | 1. The database containing the data will appear in `db`.
20 | 1. Manually copy the `DB_UCSD_ANOMALY.*` files into the `/usr/local/Datasets` folder. There should be four files.
21 |
22 | ## How to Use the Data in You App
23 |
24 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes.
25 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module.
26 |
27 |
28 | try {
29 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data
30 | BreastCancerDemo demo = new BreastCancerDemo();
31 | demo.go();
32 | } catch (Exception e) {
33 | e.printStackTrace();
34 | } finally {
35 | BreastCancerDAO.release(); // release data resources
36 | }
37 |
--------------------------------------------------------------------------------
/datasets-ucsd-anomaly/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | org.knowm.datasets
7 | datasets-parent
8 | 2.2.0-SNAPSHOT
9 |
10 |
11 | datasets-ucsd-anomaly
12 |
13 | Knowm Datasets for machine learning applications
14 | http://knowm.org/open-source/
15 | 2013
16 |
17 |
18 | Knowm Inc.
19 | http://knowm.org
20 |
21 |
22 |
23 |
24 | org.knowm.datasets
25 | datasets-common
26 | 2.2.0-SNAPSHOT
27 |
28 |
29 | org.hsqldb
30 | hsqldb
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/datasets-ucsd-anomaly/src/main/java/org/knowm/datasets/ucsdanomaly/UCSDAnomaly.java:
--------------------------------------------------------------------------------
1 | /**
2 | * (The MIT License)
3 | *
4 | *
Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. Copyright 2013-2015 Xeiam
5 | * LLC (http://xeiam.com) and contributors.
6 | *
7 | *
Permission is hereby granted, free of charge, to any person obtaining a copy of this software
8 | * and associated documentation files (the "Software"), to deal in the Software without restriction,
9 | * including without limitation the rights to use, copy, modify, merge, publish, distribute,
10 | * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
11 | * furnished to do so, subject to the following conditions:
12 | *
13 | *
The above copyright notice and this permission notice shall be included in all copies or
14 | * substantial portions of the Software.
15 | *
16 | *
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
17 | * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | */
22 | /**
23 | * This product currently only contains code developed by authors of specific components, as
24 | * identified by the source code files.
25 | *
26 | *
Since product implements StAX API, it has dependencies to StAX API classes.
27 | *
28 | *
For additional credits (generally to people who reported problems) see CREDITS file.
29 | */
30 | package org.knowm.datasets.ucsdanomaly;
31 |
32 | import java.awt.image.BufferedImage;
33 | import java.io.InputStream;
34 | import java.sql.Blob;
35 |
36 | import javax.imageio.ImageIO;
37 |
38 | import org.knowm.datasets.common.business.Bean;
39 |
40 | /** @author timmolter */
41 | public class UCSDAnomaly extends Bean {
42 |
43 | private int tifid;
44 | private Blob tifbytes;
45 | private boolean isanomaly;
46 |
47 | public int getTifid() {
48 |
49 | return tifid;
50 | }
51 |
52 | public void setTifid(int tifid) {
53 |
54 | this.tifid = tifid;
55 | }
56 |
57 | public Blob getTifbytes() {
58 |
59 | return tifbytes;
60 | }
61 |
62 | public void setTifbytes(Blob tifbytes) {
63 |
64 | this.tifbytes = tifbytes;
65 | }
66 |
67 | public boolean isIsanomaly() {
68 |
69 | return isanomaly;
70 | }
71 |
72 | public void setIsanomaly(boolean isanomaly) {
73 |
74 | this.isanomaly = isanomaly;
75 | }
76 |
77 | public BufferedImage toBufferedImage() {
78 |
79 | InputStream bytes;
80 | try {
81 | bytes = getTifbytes().getBinaryStream();
82 | return ImageIO.read(bytes);
83 | } catch (Exception e) {
84 | // TODO Auto-generated catch block
85 | e.printStackTrace();
86 | }
87 | return null;
88 | }
89 |
90 | @Override
91 | public String toString() {
92 |
93 | return "UCSDAnomaly [id=" + getId() + ", tifid=" + tifid + ", isanomaly=" + isanomaly + "]";
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/datasets-ucsd-anomaly/src/main/resources/DB_HSQLDB_FILE.properties:
--------------------------------------------------------------------------------
1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_UCSD_ANOMALY;shutdown=true
2 | username=sa
3 | password=
4 |
--------------------------------------------------------------------------------
/datasets-ucsd-anomaly/src/main/resources/SQL_HSQLDB.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE CACHED TABLE UCSD_ANONMALIES (id INTEGER NOT NULL, tifid INTEGER NOT NULL, tifbytes blob NOT NULL, isanomaly TINYINT NOT NULL, PRIMARY KEY (id, tifid))
--------------------------------------------------------------------------------
/datasets-ucsd-anomaly/src/main/resources/SQL_MYSQL.properties:
--------------------------------------------------------------------------------
1 | KEY_CREATE_TABLE=CREATE TABLE UCSD_ANONMALIES (id INTEGER NOT NULL, tifid INTEGER NOT NULL, tifbytes blob NOT NULL, isanomaly TINYINT NOT NULL, PRIMARY KEY (id, tifid))
--------------------------------------------------------------------------------
/etc/header.txt:
--------------------------------------------------------------------------------
1 | Copyright (C) 2013-2014 Xeiam LLC http://xeiam.com
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
4 | this software and associated documentation files (the "Software"), to deal in
5 | the Software without restriction, including without limitation the rights to
6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7 | of the Software, and to permit persons to whom the Software is furnished to do
8 | so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
--------------------------------------------------------------------------------