├── .gitignore ├── .travis.yml ├── CONTRIBUTORS ├── LICENSE ├── README.md ├── datasets-breast-cancer-wisconsin-orginal ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── breastcancerwisconsinorginal │ │ │ ├── BreastCancer.java │ │ │ ├── BreastCancerDAO.java │ │ │ └── RawData2DB.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── breastcancerwisconsinorginal │ └── TestBreastCancerDAO.java ├── datasets-census-income ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── censusincome │ │ │ ├── CensusIncome.java │ │ │ ├── CensusIncomeDAO.java │ │ │ └── RawData2DB.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── censusincome │ └── TestCensusIncomeDAO.java ├── datasets-cifar10 ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── cifar10 │ │ │ ├── Cifar.java │ │ │ ├── CifarDAO.java │ │ │ ├── CifarDbFile.java │ │ │ ├── CifarImageFile.java │ │ │ ├── CifarImagePanel.java │ │ │ ├── CifarManager.java │ │ │ ├── CifarRaw.java │ │ │ ├── CifarViewer.java │ │ │ └── RawData2DB.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── cifar10 │ └── TestCifarDAO.java ├── datasets-common ├── README.md ├── pom.xml ├── src │ ├── main │ │ └── java │ │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── common │ │ │ ├── Joiner.java │ │ │ ├── Splitter.java │ │ │ ├── business │ │ │ ├── Bean.java │ │ │ ├── BeanGenerator.java │ │ │ ├── BeanGenerator2.java │ │ │ └── DatasetsDAO.java │ │ │ └── data │ │ │ └── HistogramDataInspector.java │ └── test │ │ └── java │ │ └── org │ │ └── knowm │ │ └── datasets │ │ └── common │ │ └── JoinerSplitterTest.java └── test │ └── test.csv ├── datasets-higgs-boson ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── higgsboson │ │ │ ├── HiggsBoson.java │ │ │ ├── HiggsBosonDAO.java │ │ │ └── RawData2DB.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── higgsboson │ └── bootstrap │ └── HiggsBosonInspector.java ├── datasets-hja-birdsong ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ ├── com │ │ │ └── musicg │ │ │ │ ├── dsp │ │ │ │ └── WindowFunction.java │ │ │ │ └── wave │ │ │ │ ├── Spectrogram.java │ │ │ │ ├── SpectrogramRender.java │ │ │ │ ├── Wave.java │ │ │ │ ├── WaveHeader.java │ │ │ │ └── WaveformRender.java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ ├── dsp │ │ │ └── FastFourierTransform.java │ │ │ └── hjabirdsong │ │ │ ├── DownloadWavFiles.java │ │ │ ├── HJABirdSong.java │ │ │ ├── HJABirdsongDAO.java │ │ │ ├── HJABirdsongParentDAO.java │ │ │ ├── RawData2DB.java │ │ │ ├── RawData2DBTenFold.java │ │ │ ├── TenFold.java │ │ │ └── TenFoldDAO.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── hjabirdsong │ ├── TestHJABirdsongDAO.java │ └── TestTenFoldDAO.java ├── datasets-lshtc4 ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── lshtc4 │ │ │ ├── LSHTC4.java │ │ │ ├── LSHTC4DAO.java │ │ │ ├── LSHTC4Hierarchy.java │ │ │ ├── LSHTC4HierarchyDAO.java │ │ │ ├── LSHTC4ParentDAO.java │ │ │ ├── RawData2DB.java │ │ │ └── RawData2DBHierarchy.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── lshtc4 │ ├── TestLSHTC4DAO.java │ └── TestLSHTC4HierarchyDAO.java ├── datasets-mnist ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ ├── mnist │ │ │ └── tools │ │ │ │ ├── MnistDbFile.java │ │ │ │ ├── MnistImageFile.java │ │ │ │ ├── MnistLabelFile.java │ │ │ │ └── MnistManager.java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── mnist │ │ │ ├── Mnist.java │ │ │ ├── MnistDAO.java │ │ │ ├── RawData2DB.java │ │ │ └── tools │ │ │ ├── MnistDigitViewer.java │ │ │ └── MnistImagePanel.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ ├── SQL_MYSQL.properties │ │ └── simplelogger.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── mnist │ ├── bootstrap │ └── MnistFeatureFactory.java │ └── unit │ └── TestMnistDAO.java ├── datasets-nsl-kdd ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── nslkdd │ │ │ ├── NSLKDD.java │ │ │ ├── NSLKDDDAO.java │ │ │ └── RawData2DB.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── nslkdd │ └── TestNSLKDDDAO.java ├── datasets-numenta ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── numenta │ │ │ ├── NumentaDAO.java │ │ │ ├── RawData2DB.java │ │ │ └── SeriesPoint.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── numenta │ └── TestNumentaDAO.java ├── datasets-pcb ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── pcb │ │ │ ├── PCB.java │ │ │ ├── PCBAnnotation.java │ │ │ ├── PCBAnnotationDAO.java │ │ │ ├── PCBDAO.java │ │ │ ├── PCBParentDAO.java │ │ │ └── RawData2DB.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── pcb │ └── TestPCBDAO.java ├── datasets-reuters-21578 ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── reuters21578 │ │ │ ├── RawData2DB.java │ │ │ ├── Reuters21578.java │ │ │ └── Reuters21578DAO.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── reuters21578 │ └── TestReuters21578DAO.java ├── datasets-samples ├── pom.xml └── src │ └── main │ ├── java │ └── org │ │ └── knowm │ │ └── datasets │ │ └── samples │ │ ├── BreastCancerDataInspector.java │ │ ├── BreastCancerDemo.java │ │ ├── CensusIncomeDemo.java │ │ ├── Cifar10Demo.java │ │ ├── CifarImageDisplayApp.java │ │ ├── HJABirdsongSpectrogramViewer.java │ │ ├── HiggsBosonDemo.java │ │ ├── LSHTC4Demo.java │ │ ├── MNISTDemo.java │ │ ├── MnistImageDisplayApp.java │ │ ├── NSLKDDDemo.java │ │ ├── PCBAnnotatedImageViewer.java │ │ ├── Reuters21578Demo.java │ │ └── UCSDAnomalyVideoFrameViewer.java │ └── resources │ └── simplelogger.properties ├── datasets-ucsd-anomaly ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── knowm │ │ │ └── datasets │ │ │ └── ucsdanomaly │ │ │ ├── RawData2DB.java │ │ │ ├── UCSDAnomaly.java │ │ │ └── UCSDAnomalyDAO.java │ └── resources │ │ ├── DB_HSQLDB_FILE.properties │ │ ├── SQL_HSQLDB.properties │ │ └── SQL_MYSQL.properties │ └── test │ └── java │ └── org │ └── knowm │ └── datasets │ └── ucsdanomaly │ └── TestUCSDAnomalyDAO.java ├── etc └── header.txt └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | javadoc/ 3 | bin/ 4 | .classpath 5 | .project 6 | .settings/ 7 | .idea/ 8 | *.iml 9 | *.iws 10 | .DS_Store 11 | raw/ 12 | db/ 13 | apacheOpenNLP/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: false 3 | 4 | before_install: "git clone -b travis `git config --get remote.origin.url` target/travis" 5 | script: " 6 | if [ ${TRAVIS_PULL_REQUEST} = 'false' ]; 7 | then 8 | mvn -Dmaven.test.skip=true clean deploy --settings target/travis/settings.xml; 9 | else 10 | mvn clean verify --settings target/travis/settings.xml; 11 | fi" 12 | 13 | jdk: 14 | - oraclejdk8 15 | 16 | # whitelist 17 | branches: 18 | only: 19 | - develop 20 | 21 | env: 22 | global: 23 | - secure: "ayAHHNCTmITUTf231Rk24EtsBf1ZFn9Epl443JvcMtFQi5yxu2G7Maiu8fXGxxcYYDGRL8zTupher3v9YSU2JBF04vr1BJGjwlHSfpPsBheMh+X2QiZdpJN3d3RgGG3GfUDrxkf+xn6w8rBdBOblBgCcEl2m7a0PmgdCBa8/37E=" 24 | - secure: "yt1gwgQbWLGBtAEfFoNelfW/z934kJ43ETZ62BDZt29RNvhP9jj5p6rOfz2szeQ/oZy6gqUHfPhbWaBK3ct2d5RoIpkbZseHP9lG0wwDmeSekZIoAFQcSL1dFNvyfMJftZtrSO6p9Vv/DpE9/3Wff8X0BpweF9r8LHgOU4kper8=" 25 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Datasets is developed by Knowm Inc. members and the open-source community. 2 | 3 | We thank all of our contributors: https://github.com/timmolter/datasets/graphs/contributors 4 | 5 | For the detailed history of contributions of a given file, try 6 | 7 | git blame file 8 | 9 | To see line-by-line credits and to see the change log even across renames and rewrites, try 10 | 11 | git log --follow file 12 | 13 | Copyright is held by the original contributor according to the versioning history; see LICENSE. 14 | 15 | The following list of authors was automatically generated from the Datasets project's git repo with the command: 16 | 17 | git log --format='%aN' | sort -u 18 | 19 | Tim Molter 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 4 | Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | of the Software, and to permit persons to whom the Software is furnished to do 11 | so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /datasets-breast-cancer-wisconsin-orginal/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets Breast Cancer Wisconsin Original 2 | 3 | [raw data](http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_BREAST_CANCER.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data from here: http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29 13 | 1. put the `breast-cancer-wisconsin.data` file in a `raw` folder in project root 14 | 1. Run `RawData2DB.java`. 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `DB_BREAST_CANCER.*` files into the `/usr/local/Datasets` folder. There should be 3 files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | -------------------------------------------------------------------------------- /datasets-breast-cancer-wisconsin-orginal/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | 7 | org.knowm.datasets 8 | datasets-parent 9 | 2.2.0-SNAPSHOT 10 | 11 | 12 | datasets-breast-cancer-wisconsin-original 13 | 14 | Knowm Datasets for machine learning applications 15 | http://knowm.org/open-source/ 16 | 2013 17 | 18 | 19 | Knowm Inc. 20 | http://knowm.org 21 | 22 | 23 | 24 | 25 | org.knowm.datasets 26 | datasets-common 27 | 2.2.0-SNAPSHOT 28 | 29 | 30 | org.hsqldb 31 | hsqldb 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /datasets-breast-cancer-wisconsin-orginal/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_BREAST_CANCER;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-breast-cancer-wisconsin-orginal/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE BREAST_CANCER (id INTEGER NOT NULL, SAMPLECODENUMBER INTEGER NOT NULL, CLUMPTHICKNESS INTEGER NOT NULL, UNIFORMITYOFCELLSIZE INTEGER NOT NULL, UNIFORMITYOFCELLSHAPE INTEGER NOT NULL, MARGINALADHESION INTEGER NOT NULL, SINGLEEPITHELIALCELLSIZE INTEGER NOT NULL, BARENUCLEI INTEGER NOT NULL, BLANDCHROMATIN INTEGER NOT NULL, NORMALNUCLEOLI INTEGER NOT NULL, MITOSES INTEGER NOT NULL, CELLCLASS INTEGER NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-breast-cancer-wisconsin-orginal/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE BREAST_CANCER (id INTEGER NOT NULL, SAMPLECODENUMBER INTEGER NOT NULL, CLUMPTHICKNESS INTEGER NOT NULL, UNIFORMITYOFCELLSIZE INTEGER NOT NULL, UNIFORMITYOFCELLSHAPE INTEGER NOT NULL, MARGINALADHESION INTEGER NOT NULL, SINGLEEPITHELIALCELLSIZE INTEGER NOT NULL, BARENUCLEI INTEGER NOT NULL, BLANDCHROMATIN INTEGER NOT NULL, NORMALNUCLEOLI INTEGER NOT NULL, MITOSES INTEGER NOT NULL, CELLCLASS INTEGER NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-breast-cancer-wisconsin-orginal/src/test/java/org/knowm/datasets/breastcancerwisconsinorginal/TestBreastCancerDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.breastcancerwisconsinorginal; 36 | 37 | import static org.hamcrest.CoreMatchers.equalTo; 38 | import static org.hamcrest.MatcherAssert.assertThat; 39 | 40 | import org.junit.AfterClass; 41 | import org.junit.BeforeClass; 42 | import org.junit.Ignore; 43 | import org.junit.Test; 44 | 45 | /** 46 | * @author timmolter 47 | */ 48 | @Ignore 49 | public class TestBreastCancerDAO { 50 | 51 | @BeforeClass 52 | public static void setUpDB() { 53 | 54 | BreastCancerDAO.init(new String[0]); 55 | 56 | } 57 | 58 | @AfterClass 59 | public static void tearDownDB() { 60 | 61 | BreastCancerDAO.release(); 62 | } 63 | 64 | @Test 65 | public void testSelectCount() { 66 | 67 | long count = BreastCancerDAO.selectCount(); 68 | assertThat(count, equalTo(683L)); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /datasets-census-income/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets Census Income 2 | 3 | [raw data](http://archive.ics.uci.edu/ml/datasets/Census+Income) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_CENSUS_INCOME.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data from here: 13 | 1. put the `adult.data` and `adult.test` files in a `raw` folder in project root 14 | 1. Run `RawData2DB.java`. 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `DB_CENSUS_INCOME.*` files into the `/usr/local/Datasets` folder. There should be 3 files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | -------------------------------------------------------------------------------- /datasets-census-income/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | 7 | org.knowm.datasets 8 | datasets-parent 9 | 2.2.0-SNAPSHOT 10 | 11 | 12 | datasets-census-income 13 | 14 | Knowm Datasets for machine learning applications 15 | http://knowm.org/open-source/ 16 | 2013 17 | 18 | 19 | Knowm Inc. 20 | http://knowm.org 21 | 22 | 23 | 24 | 25 | org.knowm.datasets 26 | datasets-common 27 | 2.2.0-SNAPSHOT 28 | 29 | 30 | org.hsqldb 31 | hsqldb 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /datasets-census-income/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_CENSUS_INCOME;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-census-income/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE CENSUS_INCOME (id INTEGER NOT NULL, age INTEGER NOT NULL, workclass VARCHAR(256) NOT NULL, fnlwgt INTEGER NOT NULL, education VARCHAR(256) NOT NULL, educationNum INTEGER NOT NULL, maritalStatus VARCHAR(256) NOT NULL, occupation VARCHAR(256) NOT NULL, relationship VARCHAR(256) NOT NULL, race VARCHAR(256) NOT NULL, sex VARCHAR(256) NOT NULL, capitalGain INTEGER NOT NULL, capitalLoss INTEGER NOT NULL, hoursPerWeek INTEGER NOT NULL, nativeCountry VARCHAR(256) NOT NULL, incomeLessThan50k TINYINT NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-census-income/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE CENSUS_INCOME (id INTEGER NOT NULL, age INTEGER NOT NULL, workclass VARCHAR(256) NOT NULL, fnlwgt INTEGER NOT NULL, education VARCHAR(256) NOT NULL, educationNum INTEGER NOT NULL, maritalStatus VARCHAR(256) NOT NULL, occupation VARCHAR(256) NOT NULL, relationship VARCHAR(256) NOT NULL, race VARCHAR(256) NOT NULL, sex VARCHAR(256) NOT NULL, capitalGain INTEGER NOT NULL, capitalLoss INTEGER NOT NULL, hoursPerWeek INTEGER NOT NULL, nativeCountry VARCHAR(256) NOT NULL, incomeLessThan50k TINYINT NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-census-income/src/test/java/org/knowm/datasets/censusincome/TestCensusIncomeDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | *

Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. Copyright 2013-2015 Xeiam 5 | * LLC (http://xeiam.com) and contributors. 6 | * 7 | *

Permission is hereby granted, free of charge, to any person obtaining a copy of this software 8 | * and associated documentation files (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, publish, distribute, 10 | * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | *

The above copyright notice and this permission notice shall be included in all copies or 14 | * substantial portions of the Software. 15 | * 16 | *

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 17 | * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | /** 23 | * This product currently only contains code developed by authors of specific components, as 24 | * identified by the source code files. 25 | * 26 | *

Since product implements StAX API, it has dependencies to StAX API classes. 27 | * 28 | *

For additional credits (generally to people who reported problems) see CREDITS file. 29 | */ 30 | package org.knowm.datasets.censusincome; 31 | 32 | import static org.hamcrest.CoreMatchers.equalTo; 33 | import static org.hamcrest.MatcherAssert.assertThat; 34 | 35 | import org.junit.AfterClass; 36 | import org.junit.BeforeClass; 37 | import org.junit.Ignore; 38 | import org.junit.Test; 39 | import org.knowm.yank.PropertiesUtils; 40 | import org.knowm.yank.Yank; 41 | 42 | /** @author timmolter */ 43 | @Ignore 44 | public class TestCensusIncomeDAO { 45 | 46 | @BeforeClass 47 | public static void setUpDB() { 48 | 49 | Yank.setupDefaultConnectionPool( 50 | PropertiesUtils.getPropertiesFromClasspath("DB_HSQLDB_FILE.properties")); 51 | } 52 | 53 | @AfterClass 54 | public static void tearDownDB() { 55 | 56 | CensusIncomeDAO.release(); 57 | } 58 | 59 | @Test 60 | public void testSelectCount() { 61 | 62 | long count = CensusIncomeDAO.selectCount(); 63 | assertThat(count, equalTo(48842L)); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /datasets-cifar10/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets CIFAR-10 2 | 3 | [raw data](http://www.cs.toronto.edu/~kriz/cifar.html) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_CIFAR.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data from here: http://www.cs.toronto.edu/~kriz/cifar.html 13 | 1. put all files in `raw` folder in project root 14 | 1. Run `RawData2DB.java`. 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `DB_CIFAR.*` files into the `/usr/local/Datasets` folder. There should be three files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | -------------------------------------------------------------------------------- /datasets-cifar10/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | org.knowm.datasets 7 | datasets-parent 8 | 2.2.0-SNAPSHOT 9 | 10 | 11 | datasets-cifar10 12 | 13 | Knowm Datasets for machine learning applications 14 | http://knowm.org/open-source/ 15 | 2013 16 | 17 | 18 | Knowm Inc. 19 | http://knowm.org 20 | 21 | 22 | 23 | 24 | org.knowm.datasets 25 | datasets-common 26 | 2.2.0-SNAPSHOT 27 | 28 | 29 | org.hsqldb 30 | hsqldb 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.cifar10; 36 | 37 | import org.knowm.datasets.common.business.DatasetsDAO; 38 | import org.knowm.yank.Yank; 39 | 40 | /** 41 | * @author timmolter 42 | */ 43 | public class CifarDAO extends DatasetsDAO { 44 | 45 | public static void init(String dataFilesDir) { 46 | 47 | String dataFileID = "0ByP7_A9vXm17VERJam9EMm5sTkU"; 48 | String propsFileID = "0ByP7_A9vXm17VHIzd1hSNW4zUXc"; 49 | String scriptFileID = "0ByP7_A9vXm17eHlzcDJfalNoYkk"; 50 | 51 | init("DB_CIFAR", dataFilesDir, dataFileID, propsFileID, scriptFileID, null, true); 52 | } 53 | 54 | public static int dropTable() { 55 | 56 | return Yank.execute("DROP TABLE IF EXISTS CIFAR", null); 57 | } 58 | 59 | public static int getTrainTestSplit() { 60 | 61 | return 50000; 62 | } 63 | 64 | public static int createTable() { 65 | 66 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null); 67 | } 68 | 69 | public static int insert(Cifar CIFAR) { 70 | 71 | Object[] params = new Object[] { 72 | 73 | // @formatter:off 74 | CIFAR.getId(), CIFAR.getLabel(), CIFAR.getImagedata() 75 | // @formatter:on 76 | }; 77 | String CIFAR_INSERT = "INSERT INTO CIFAR (id, label, imagedata) VALUES (?, ?, ?)"; 78 | return Yank.execute(CIFAR_INSERT, params); 79 | 80 | } 81 | 82 | public static Cifar selectSingle(int id) { 83 | 84 | Object[] params = new Object[] { id }; 85 | 86 | String SELECT_SINGLE = "SELECT * FROM CIFAR WHERE id = ?"; 87 | 88 | return Yank.queryBean(SELECT_SINGLE, Cifar.class, params); 89 | } 90 | 91 | public static long selectCount() { 92 | 93 | String SELECT_COUNT = "SELECT COUNT(*) FROM CIFAR"; 94 | 95 | return Yank.queryScalar(SELECT_COUNT, Long.class, null); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarManager.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.cifar10; 36 | 37 | import java.io.IOException; 38 | 39 | public class CifarManager { 40 | 41 | private CifarImageFile images; 42 | 43 | /** 44 | * Constructs an instance managing the two given data files. Supports NULL value for one of the arguments in case reading only one of 45 | * the files (images and labels) is required. 46 | * 47 | * @param imagesFile Can be NULL. In that case all future operations using that file will fail. 48 | * @throws IOException 49 | */ 50 | public CifarManager(String imagesFile) throws IOException { 51 | 52 | if (imagesFile != null) { 53 | images = new CifarImageFile(imagesFile, "r"); 54 | } 55 | } 56 | 57 | /** 58 | * Reads the current image. 59 | * 60 | * @return matrix 61 | * @throws IOException 62 | */ 63 | public CifarRaw readImage() throws IOException { 64 | 65 | if (images == null) { 66 | throw new IllegalStateException("Images file not initialized."); 67 | } 68 | return images.readImage(); 69 | } 70 | 71 | /** 72 | * Set the position to be read. 73 | * 74 | * @param index 75 | */ 76 | public void setCurrent(int index) { 77 | 78 | images.setCurrentIndex(index); 79 | } 80 | 81 | /** 82 | * Get the underlying images file as {@link CifarImageFile}. 83 | * 84 | * @return {@link CifarImageFile}. 85 | */ 86 | public CifarImageFile getImages() { 87 | 88 | return images; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarRaw.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.cifar10; 36 | 37 | /** 38 | * @author timmolter 39 | */ 40 | public class CifarRaw { 41 | 42 | private final int label; 43 | private final int[][] redChannel; 44 | private final int[][] greenChannel; 45 | private final int[][] blueChannel; 46 | 47 | /** 48 | * Constructor 49 | * 50 | * @param label 51 | * @param redChannel 52 | * @param greenChannel 53 | * @param blueChannel 54 | */ 55 | public CifarRaw(int label, int[][] redChannel, int[][] greenChannel, int[][] blueChannel) { 56 | 57 | this.label = label; 58 | this.redChannel = redChannel; 59 | this.greenChannel = greenChannel; 60 | this.blueChannel = blueChannel; 61 | } 62 | 63 | public int getLabel() { 64 | 65 | return label; 66 | } 67 | 68 | public int[][] getRedChannel() { 69 | 70 | return redChannel; 71 | } 72 | 73 | public int[][] getGreenChannel() { 74 | 75 | return greenChannel; 76 | } 77 | 78 | public int[][] getBlueChannel() { 79 | 80 | return blueChannel; 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /datasets-cifar10/src/main/java/org/knowm/datasets/cifar10/CifarViewer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.cifar10; 36 | 37 | import java.awt.BorderLayout; 38 | 39 | import javax.swing.JFrame; 40 | import javax.swing.JPanel; 41 | 42 | /** 43 | * @author timmolter 44 | */ 45 | public class CifarViewer extends JFrame { 46 | 47 | public CifarViewer(JPanel jPanel, String title) { 48 | 49 | add(jPanel, BorderLayout.CENTER); 50 | pack(); 51 | 52 | setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 53 | setLocationRelativeTo(null); 54 | setTitle(title); 55 | setResizable(false); 56 | setVisible(true); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /datasets-cifar10/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_CIFAR;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-cifar10/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE CIFAR (id INTEGER NOT NULL, label INTEGER NOT NULL, imagedata VARCHAR(30000) NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-cifar10/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE CIFAR (id INTEGER NOT NULL, label INTEGER NOT NULL, imagedata VARCHAR(30000) NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-cifar10/src/test/java/org/knowm/datasets/cifar10/TestCifarDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.cifar10; 36 | 37 | import static org.hamcrest.CoreMatchers.equalTo; 38 | import static org.hamcrest.MatcherAssert.assertThat; 39 | 40 | import org.junit.AfterClass; 41 | import org.junit.BeforeClass; 42 | import org.junit.Ignore; 43 | import org.junit.Test; 44 | import org.knowm.datasets.cifar10.CifarDAO; 45 | 46 | /** 47 | * @author timmolter 48 | */ 49 | @Ignore 50 | public class TestCifarDAO { 51 | 52 | @BeforeClass 53 | public static void setUpDB() { 54 | 55 | CifarDAO.init(new String[0]); 56 | } 57 | 58 | @AfterClass 59 | public static void tearDownDB() { 60 | 61 | CifarDAO.release(); 62 | } 63 | 64 | @Test 65 | public void testSelectCount() { 66 | 67 | long count = CifarDAO.selectCount(); 68 | assertThat(count, equalTo(60000L)); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /datasets-common/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets Common Code 2 | 3 | -------------------------------------------------------------------------------- /datasets-common/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | 7 | org.knowm.datasets 8 | datasets-parent 9 | 2.2.0-SNAPSHOT 10 | 11 | 12 | datasets-common 13 | 14 | Knowm Datasets - Common Code 15 | http://knowm.org/open-source/ 16 | 2013 17 | 18 | 19 | Knowm Inc. 20 | http://knowm.org 21 | 22 | 23 | 24 | 25 | commons-io 26 | commons-io 27 | 28 | 29 | org.knowm 30 | yank 31 | 32 | 33 | commons-beanutils 34 | commons-beanutils 35 | 36 | 37 | org.knowm.xchart 38 | xchart 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /datasets-common/src/main/java/org/knowm/datasets/common/Joiner.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.common; 36 | 37 | import java.util.List; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class Joiner { 43 | 44 | /** 45 | * Joins a list of Strings 46 | * 47 | * @param separator 48 | * @param topicsArray 49 | * @return 50 | */ 51 | public static String join(String separator, List topicsArray) { 52 | 53 | StringBuilder sb = new StringBuilder(); 54 | for (int i = 0; i < topicsArray.size(); i++) { 55 | 56 | if (sb.length() > 0) { 57 | sb.append(","); 58 | } 59 | sb.append(topicsArray.get(i)); 60 | } 61 | return sb.toString(); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /datasets-common/src/main/java/org/knowm/datasets/common/Splitter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.common; 36 | 37 | import java.util.Arrays; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class Splitter { 43 | 44 | /** 45 | * Splits a String into an iterable 46 | * 47 | * @param separator 48 | * @param stringToSplit 49 | * @return 50 | */ 51 | public static Iterable split(String separator, String stringToSplit) { 52 | 53 | String[] StringArray = stringToSplit.split(separator); 54 | 55 | Iterable iterable = Arrays.asList(StringArray); 56 | 57 | return iterable; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /datasets-common/src/main/java/org/knowm/datasets/common/business/Bean.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.common.business; 36 | 37 | import java.io.Serializable; 38 | 39 | /** 40 | * Created by timmolter on 5/18/17. 41 | */ 42 | public class Bean implements Serializable { 43 | 44 | private int id; 45 | 46 | public int getId() { 47 | 48 | return id; 49 | } 50 | 51 | public void setId(int id) { 52 | 53 | this.id = id; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /datasets-common/src/test/java/org/knowm/datasets/common/JoinerSplitterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.common; 36 | 37 | import static org.fest.assertions.api.Assertions.assertThat; 38 | 39 | import java.util.ArrayList; 40 | import java.util.Arrays; 41 | import java.util.Iterator; 42 | import java.util.List; 43 | 44 | import org.junit.Test; 45 | 46 | /** 47 | * @author timmolter 48 | */ 49 | public class JoinerSplitterTest { 50 | 51 | @Test 52 | public void test1() { 53 | 54 | String[] stringArray = new String[] { "foo", "bar", "blah" }; 55 | List stringList = new ArrayList(Arrays.asList(stringArray)); 56 | String csv = Joiner.join(",", stringList); 57 | assertThat(csv).isEqualTo("foo,bar,blah"); 58 | 59 | Iterable iterable = Splitter.split(",", csv); 60 | Iterator itr = iterable.iterator(); 61 | assertThat(itr.next()).isEqualTo("foo"); 62 | assertThat(itr.next()).isEqualTo("bar"); 63 | assertThat(itr.next()).isEqualTo("blah"); 64 | 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /datasets-common/test/test.csv: -------------------------------------------------------------------------------- 1 | name, age, height 2 | joe, 23, 180.5 -------------------------------------------------------------------------------- /datasets-higgs-boson/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | org.knowm.datasets 7 | datasets-parent 8 | 2.2.0-SNAPSHOT 9 | 10 | 11 | datasets-higgs-boson 12 | higgs-boson 13 | 14 | Knowm Datasets for machine learning applications 15 | http://knowm.org/open-source/ 16 | 2013 17 | 18 | 19 | Knowm Inc. 20 | http://knowm.org 21 | 22 | 23 | 24 | 25 | org.knowm.datasets 26 | datasets-common 27 | 2.2.0-SNAPSHOT 28 | 29 | 30 | org.hsqldb 31 | hsqldb 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /datasets-higgs-boson/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_HIGGS_BOSON;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-higgs-boson/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE HIGGS_BOSON (EventId INTEGER NOT NULL, DER_mass_MMC FLOAT NULL, DER_mass_transverse_met_lep FLOAT NULL, DER_mass_vis FLOAT NULL, DER_pt_h FLOAT NULL, DER_deltaeta_jet_jet FLOAT NULL, DER_mass_jet_jet FLOAT NULL, DER_prodeta_jet_jet FLOAT NULL, DER_deltar_tau_lep FLOAT NULL, DER_pt_tot FLOAT NULL, DER_sum_pt FLOAT NULL, DER_pt_ratio_lep_tau FLOAT NULL, DER_met_phi_centrality FLOAT NULL, DER_lep_eta_centrality FLOAT NULL, PRI_tau_pt FLOAT NULL, PRI_tau_eta FLOAT NULL, PRI_tau_phi FLOAT NULL, PRI_lep_pt FLOAT NULL, PRI_lep_eta FLOAT NULL, PRI_lep_phi FLOAT NULL, PRI_met FLOAT NULL, PRI_met_phi FLOAT NULL, PRI_met_sumet FLOAT NULL, PRI_jet_num FLOAT NULL, PRI_jet_leading_pt FLOAT NULL, PRI_jet_leading_eta FLOAT NULL, PRI_jet_leading_phi FLOAT NULL, PRI_jet_subleading_pt FLOAT NULL, PRI_jet_subleading_eta FLOAT NULL, PRI_jet_subleading_phi FLOAT NULL, PRI_jet_all_pt FLOAT NULL, Weight FLOAT NULL, Label VARCHAR(1) NULL, PRIMARY KEY (EventId))"; -------------------------------------------------------------------------------- /datasets-higgs-boson/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE HIGGS_BOSON (EventId INTEGER NOT NULL, DER_mass_MMC FLOAT NULL, DER_mass_transverse_met_lep FLOAT NULL, DER_mass_vis FLOAT NULL, DER_pt_h FLOAT NULL, DER_deltaeta_jet_jet FLOAT NULL, DER_mass_jet_jet FLOAT NULL, DER_prodeta_jet_jet FLOAT NULL, DER_deltar_tau_lep FLOAT NULL, DER_pt_tot FLOAT NULL, DER_sum_pt FLOAT NULL, DER_pt_ratio_lep_tau FLOAT NULL, DER_met_phi_centrality FLOAT NULL, DER_lep_eta_centrality FLOAT NULL, PRI_tau_pt FLOAT NULL, PRI_tau_eta FLOAT NULL, PRI_tau_phi FLOAT NULL, PRI_lep_pt FLOAT NULL, PRI_lep_eta FLOAT NULL, PRI_lep_phi FLOAT NULL, PRI_met FLOAT NULL, PRI_met_phi FLOAT NULL, PRI_met_sumet FLOAT NULL, PRI_jet_num FLOAT NULL, PRI_jet_leading_pt FLOAT NULL, PRI_jet_leading_eta FLOAT NULL, PRI_jet_leading_phi FLOAT NULL, PRI_jet_subleading_pt FLOAT NULL, PRI_jet_subleading_eta FLOAT NULL, PRI_jet_subleading_phi FLOAT NULL, PRI_jet_all_pt FLOAT NULL, Weight FLOAT NULL, Label VARCHAR(1) NULL, PRIMARY KEY (EventId))"; -------------------------------------------------------------------------------- /datasets-hja-birdsong/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets HJA Birdsong 2 | 3 | [raw data](http://web.engr.oregonstate.edu/~briggsf/kdd2012datasets/hja_birdsong/) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_HJA_BIRDSONG.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data from here: 13 | 1. put all files (hierarchy.zip, test-remapped and train-remapped) in `raw` folder in project root 14 | 1. Run `DownloadWavFiles.java` and `RawData2DB.java`. 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `DB_HJA_BIRDSONG.*` files into some folder, which you will point to later in an app using the data such as `/usr/local/Datasets/`. There should be four files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | 34 | ## Birdsong Spectrograph Viewer 35 | 36 | See: `org.knowm.datasets.samples.HJABirdsongSpectrogramViewer`. Make sure you've placed the generated/downloaded DB data at the location specified at the line: `HJABirdsongDAO.init("/usr/local/Datasets/"); // setup data`. 37 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | org.knowm.datasets 7 | datasets-parent 8 | 2.2.0-SNAPSHOT 9 | 10 | 11 | datasets-hja-birdsong 12 | 13 | Knowm Datasets for machine learning applications 14 | http://knowm.org/open-source/ 15 | 2013 16 | 17 | 18 | Knowm Inc. 19 | http://knowm.org 20 | 21 | 22 | 23 | 24 | org.knowm.datasets 25 | datasets-common 26 | 2.2.0-SNAPSHOT 27 | 28 | 29 | org.hsqldb 30 | hsqldb 31 | 32 | 33 | org.apache.commons 34 | commons-math3 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/com/musicg/dsp/WindowFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2011 Jacquet Wong 3 | * Copyright (C) 2014 Xeiam 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package com.musicg.dsp; 18 | 19 | /** 20 | * Window functions generator 21 | * 22 | * @author Jacquet Wong 23 | */ 24 | public class WindowFunction { 25 | 26 | public enum WindowType { 27 | RECTANGULAR, BARTLETT, HANNING, HAMMING, BLACKMAN 28 | }; 29 | 30 | /** 31 | * Generate a window 32 | * 33 | * @param windowType 34 | * @param nSamples size of the window 35 | * @return window in array 36 | */ 37 | public double[] generate(WindowType windowType, int nSamples) { 38 | 39 | // generate nSamples window function values 40 | // for index values 0 .. nSamples - 1 41 | int m = nSamples / 2; 42 | double r; 43 | double pi = Math.PI; 44 | double[] w = new double[nSamples]; 45 | switch (windowType) { 46 | case BARTLETT: // Bartlett (triangular) window 47 | for (int n = 0; n < nSamples; n++) { 48 | w[n] = 1.0f - Math.abs(n - m) / m; 49 | } 50 | break; 51 | case HANNING: // Hanning window 52 | r = pi / (m + 1); 53 | for (int n = -m; n < m; n++) { 54 | w[m + n] = 0.5f + 0.5f * Math.cos(n * r); 55 | } 56 | break; 57 | case HAMMING: // Hamming window 58 | r = pi / m; 59 | for (int n = -m; n < m; n++) { 60 | w[m + n] = 0.54f + 0.46f * Math.cos(n * r); 61 | } 62 | break; 63 | case BLACKMAN: // Blackman window 64 | r = pi / m; 65 | for (int n = -m; n < m; n++) { 66 | w[m + n] = 0.42f + 0.5f * Math.cos(n * r) + 0.08f * Math.cos(2 * n * r); 67 | } 68 | break; 69 | default: // Rectangular window function 70 | for (int n = 0; n < nSamples; n++) { 71 | w[n] = 1.0f; 72 | } 73 | } 74 | return w; 75 | } 76 | } -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/com/musicg/wave/SpectrogramRender.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2011 Jacquet Wong 3 | * Copyright (C) 2014 Xeiam 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package com.musicg.wave; 18 | 19 | import java.awt.image.BufferedImage; 20 | import java.io.File; 21 | import java.io.IOException; 22 | 23 | import javax.imageio.ImageIO; 24 | 25 | public class SpectrogramRender { 26 | 27 | /** 28 | * Render a spectrogram of a wave file 29 | * 30 | * @param spectrogram spectrogram object 31 | */ 32 | public BufferedImage renderSpectrogram(Spectrogram spectrogram) { 33 | 34 | double[][] spectrogramData = spectrogram.getNormalizedSpectrogramData(); 35 | 36 | int width = spectrogramData.length; 37 | int height = spectrogramData[0].length; 38 | 39 | BufferedImage bufferedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); 40 | for (int i = 0; i < width; i++) { 41 | for (int j = 0; j < height; j++) { 42 | int value; 43 | value = (int) (spectrogramData[i][j] * 255); 44 | bufferedImage.setRGB(i, j, value << 16 | value << 8 | value); 45 | } 46 | } 47 | 48 | return bufferedImage; 49 | 50 | } 51 | 52 | public void saveSpectrogram(BufferedImage bufferedImage, String filename) throws IOException { 53 | 54 | int dotPos = filename.lastIndexOf("."); 55 | String extension = filename.substring(dotPos + 1); 56 | ImageIO.write(bufferedImage, extension, new File(filename)); 57 | } 58 | 59 | /** 60 | * Render a spectrogram of a wave file 61 | * 62 | * @param spectrogram spectrogram object 63 | * @param filename output file 64 | * @throws IOException 65 | * @see RGB graphic rendered 66 | */ 67 | public void saveSpectrogram(Spectrogram spectrogram, String filename) throws IOException { 68 | 69 | BufferedImage bufferedImage = renderSpectrogram(spectrogram); 70 | saveSpectrogram(bufferedImage, filename); 71 | } 72 | } -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/org/knowm/datasets/dsp/FastFourierTransform.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.dsp; 36 | 37 | import org.apache.commons.math3.complex.Complex; 38 | import org.apache.commons.math3.transform.DftNormalization; 39 | import org.apache.commons.math3.transform.FastFourierTransformer; 40 | import org.apache.commons.math3.transform.TransformType; 41 | 42 | /** 43 | * FFT object, transform amplitudes to frequency intensities 44 | */ 45 | public class FastFourierTransform { 46 | 47 | /** 48 | * Get the frequency intensities 49 | * 50 | * @param amplitudes amplitudes of the signal 51 | */ 52 | public double[] getMagnitudes(double[] amplitudes) { 53 | 54 | FastFourierTransformer fft = new FastFourierTransformer(DftNormalization.STANDARD); 55 | 56 | Complex[] result = fft.transform(amplitudes, TransformType.FORWARD); 57 | 58 | // convert from complex to magnitude and only keep the positive frequencies as the FTT creates a symmetric result 59 | int positiveSize = result.length / 2; 60 | double[] magnitude = new double[positiveSize]; 61 | for (int i = positiveSize; i < result.length; i++) { 62 | magnitude[i - positiveSize] = result[i].abs(); 63 | } 64 | 65 | return magnitude; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/DownloadWavFiles.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import java.io.File; 38 | import java.io.IOException; 39 | import java.net.URL; 40 | import java.util.List; 41 | 42 | import org.apache.commons.io.FileUtils; 43 | 44 | /** 45 | * @author timmolter 46 | */ 47 | public class DownloadWavFiles { 48 | 49 | public static void main(String[] args) throws IOException { 50 | 51 | DownloadWavFiles downloadWavFiles = new DownloadWavFiles(); 52 | downloadWavFiles.go(); 53 | } 54 | 55 | private void go() throws IOException { 56 | 57 | String baseURL = "http://web.engr.oregonstate.edu/~briggsf/kdd2012datasets/hja_birdsong/src_wavs/"; 58 | 59 | List wavNameLines = FileUtils.readLines(new File("./raw/id2filename.txt"), "UTF-8"); 60 | 61 | for (int i = 0; i < wavNameLines.size(); i++) { 62 | 63 | String wavNameLine = wavNameLines.get(i); 64 | String wavFileName = wavNameLine.substring(wavNameLine.indexOf(",") + 1, wavNameLine.length()) + ".wav"; 65 | System.out.println("downloading: " + wavFileName); 66 | URL url = new URL(baseURL + wavFileName); 67 | File wavDir = new File("./raw/wav/" + wavFileName); 68 | org.apache.commons.io.FileUtils.copyURLToFile(url, wavDir, 5000, 10000); 69 | } 70 | 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/HJABirdsongDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import org.knowm.yank.Yank; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class HJABirdsongDAO extends HJABirdsongParentDAO { 43 | 44 | public static int dropTable() { 45 | 46 | return Yank.execute("DROP TABLE IF EXISTS BIRD_SONGS", null); 47 | } 48 | 49 | public static int createTable() { 50 | 51 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null); 52 | } 53 | 54 | public static int insert(HJABirdSong hJABirdSong) { 55 | 56 | Object[] params = new Object[] { 57 | 58 | // @formatter:off 59 | hJABirdSong.getBagid(), hJABirdSong.getLabels(), hJABirdSong.getWavfilename(), hJABirdSong.getWavbytes() 60 | // @formatter:on 61 | }; 62 | String BIRD_SONGS_INSERT = "INSERT INTO BIRD_SONGS (bagid, labels, wavfilename, wavbytes) VALUES (?, ?, ?, ?)"; 63 | return Yank.execute(BIRD_SONGS_INSERT, params); 64 | } 65 | 66 | public static HJABirdSong selectSingle(int bagid) { 67 | 68 | Object[] params = new Object[] { bagid }; 69 | 70 | String SELECT_SINGLE = "SELECT * FROM BIRD_SONGS WHERE bagid = ?"; 71 | 72 | return Yank.queryBean(SELECT_SINGLE, HJABirdSong.class, params); 73 | } 74 | 75 | public static long selectCount() { 76 | 77 | String SELECT_COUNT = "SELECT COUNT(*) FROM BIRD_SONGS"; 78 | 79 | return Yank.queryScalar(SELECT_COUNT, Long.class, null); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/HJABirdsongParentDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import org.knowm.datasets.common.business.DatasetsDAO; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class HJABirdsongParentDAO extends DatasetsDAO { 43 | 44 | public static void init(String dataFilesDir) { 45 | 46 | String dataFileID = "0ByP7_A9vXm17SWZJa09fWnFxbGM"; 47 | String propsFileID = "0ByP7_A9vXm17RS1NMllKelJ0MlE"; 48 | String scriptFileID = "0ByP7_A9vXm17YXlZelRxV01ZdDg"; 49 | String lobsFileID = "0ByP7_A9vXm17WDBoS25pMHZmR0E"; 50 | 51 | init("DB_HJA_BIRDSONG", dataFilesDir, dataFileID, propsFileID, scriptFileID, lobsFileID, true); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/RawData2DBTenFold.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import java.io.File; 38 | import java.io.IOException; 39 | import java.util.Iterator; 40 | import java.util.List; 41 | 42 | import org.apache.commons.io.FileUtils; 43 | import org.knowm.datasets.common.Splitter; 44 | 45 | /** 46 | * @author timmolter 47 | */ 48 | public class RawData2DBTenFold { 49 | 50 | int idx = 0; 51 | 52 | public static void main(String[] args) throws IOException { 53 | 54 | TenFoldDAO.init(args); 55 | 56 | TenFoldDAO.dropTable(); 57 | TenFoldDAO.createTable(); 58 | 59 | RawData2DBTenFold dp = new RawData2DBTenFold(); 60 | System.out.println("processing TenFold data..."); 61 | dp.go("./raw/hja_birdsong_10_fold.txt"); 62 | 63 | System.out.println("done."); 64 | 65 | TenFoldDAO.release(); 66 | } 67 | 68 | private void go(String dataFile) throws IOException { 69 | 70 | List lines = FileUtils.readLines(new File(dataFile), "UTF-8"); 71 | 72 | for (String line : lines) { 73 | 74 | System.out.println(line); 75 | Iterable splitLine = Splitter.split(",", line); 76 | Iterator itr = splitLine.iterator(); 77 | String bagid = itr.next(); 78 | String fold = itr.next(); 79 | try { 80 | TenFold tenFold = new TenFold(); 81 | tenFold.setBagid(Integer.parseInt(bagid)); 82 | tenFold.setFold(Integer.parseInt(fold)); 83 | TenFoldDAO.insert(tenFold); 84 | System.out.println(tenFold.toString()); 85 | idx++; 86 | } catch (Exception e) { 87 | // e.printStackTrace(); 88 | // eat it. skip first line in file. 89 | } 90 | 91 | } 92 | 93 | System.out.println("Number parsed: " + idx); 94 | 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/TenFold.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import org.knowm.datasets.common.business.Bean; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class TenFold extends Bean { 43 | 44 | private int bagid; 45 | private int fold; 46 | 47 | public int getId() { 48 | 49 | return bagid; 50 | } 51 | 52 | public void setId(int bagid) { 53 | 54 | this.bagid = bagid; 55 | } 56 | 57 | public int getBagid() { 58 | 59 | return bagid; 60 | } 61 | 62 | public void setBagid(int bagid) { 63 | 64 | this.bagid = bagid; 65 | } 66 | 67 | public int getFold() { 68 | 69 | return fold; 70 | } 71 | 72 | public void setFold(int fold) { 73 | 74 | this.fold = fold; 75 | } 76 | 77 | @Override 78 | public String toString() { 79 | 80 | return "TenFold [bagid=" + bagid + ", fold=" + fold + "]"; 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/java/org/knowm/datasets/hjabirdsong/TenFoldDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import org.knowm.datasets.common.business.DatasetsDAO; 38 | import org.knowm.yank.Yank; 39 | 40 | /** 41 | * @author timmolter 42 | */ 43 | public class TenFoldDAO extends DatasetsDAO { 44 | 45 | public static int dropTable() { 46 | 47 | return Yank.execute("DROP TABLE IF EXISTS TEN_FOLD", null); 48 | } 49 | 50 | public static int createTable() { 51 | 52 | return Yank.executeSQLKey("KEY_CREATE_TABLE_TENFOLD", null); 53 | } 54 | 55 | public static int insert(TenFold tenFold) { 56 | 57 | Object[] params = new Object[] { 58 | 59 | // @formatter:off 60 | tenFold.getBagid(), tenFold.getFold() 61 | // @formatter:on 62 | }; 63 | String TEN_FOLD_INSERT = "INSERT INTO TEN_FOLD (bagid, fold) VALUES (?, ?)"; 64 | return Yank.execute(TEN_FOLD_INSERT, params); 65 | } 66 | 67 | public static TenFold selectSingle(int bagid) { 68 | 69 | Object[] params = new Object[] { bagid }; 70 | 71 | String SELECT_SINGLE = "SELECT * FROM TEN_FOLD WHERE bagid = ?"; 72 | 73 | return Yank.queryBean(SELECT_SINGLE, TenFold.class, params); 74 | } 75 | 76 | public static long selectCount() { 77 | 78 | String SELECT_COUNT = "SELECT COUNT(*) FROM TEN_FOLD"; 79 | 80 | return Yank.queryScalar(SELECT_COUNT, Long.class, null); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_HJA_BIRDSONG;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE BIRD_SONGS (bagid INTEGER NOT NULL, labels VARCHAR(256) NOT NULL, wavfilename VARCHAR(256) NOT NULL, wavbytes blob NOT NULL, PRIMARY KEY (bagid)) 2 | KEY_CREATE_TABLE_TENFOLD=CREATE CACHED TABLE TEN_FOLD (bagid INTEGER NOT NULL, fold INTEGER NOT NULL, PRIMARY KEY (bagid)) -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE BIRD_SONGS (bagid INTEGER NOT NULL, labels VARCHAR(256) NOT NULL, wavfilename VARCHAR(256) NOT NULL, wavbytes blob NOT NULL, PRIMARY KEY (bagid)) 2 | KEY_CREATE_TABLE_TENFOLD=CREATE TABLE TEN_FOLD (bagid INTEGER NOT NULL, fold INTEGER NOT NULL, PRIMARY KEY (bagid)) -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/test/java/org/knowm/datasets/hjabirdsong/TestHJABirdsongDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import static org.hamcrest.CoreMatchers.equalTo; 38 | import static org.hamcrest.MatcherAssert.assertThat; 39 | 40 | import java.sql.SQLException; 41 | 42 | import org.junit.AfterClass; 43 | import org.junit.BeforeClass; 44 | import org.junit.Ignore; 45 | import org.junit.Test; 46 | import org.knowm.datasets.hjabirdsong.HJABirdSong; 47 | import org.knowm.datasets.hjabirdsong.HJABirdsongDAO; 48 | 49 | /** 50 | * @author timmolter 51 | */ 52 | @Ignore 53 | public class TestHJABirdsongDAO { 54 | 55 | @BeforeClass 56 | public static void setUpDB() { 57 | 58 | HJABirdsongDAO.init(new String[0]); 59 | 60 | } 61 | 62 | @AfterClass 63 | public static void tearDownDB() { 64 | 65 | HJABirdsongDAO.release(); 66 | } 67 | 68 | @Test 69 | public void testSelectCount() { 70 | 71 | long count = HJABirdsongDAO.selectCount(); 72 | assertThat(count, equalTo(548L)); 73 | } 74 | 75 | @Test 76 | public void testSelect() throws SQLException { 77 | 78 | HJABirdSong hJABirdSong = HJABirdsongDAO.selectSingle(3); 79 | assertThat(hJABirdSong.getBagid(), equalTo(3)); 80 | assertThat(hJABirdSong.getLabels(), equalTo("1,3")); 81 | assertThat(hJABirdSong.getLabelsAsArray().get(0), equalTo(1)); 82 | assertThat(hJABirdSong.getLabelsAsArray().get(1), equalTo(3)); 83 | assertThat(hJABirdSong.getWavfilename(), equalTo("PC13_20090531_050000_10.wav")); 84 | // System.out.println(hJABirdSong.getWavbytes().length()); 85 | // System.out.println(new String(hJABirdSong.getWavbytes().getBytes(1, 4))); 86 | // assertThat(new String(hJABirdSong.getWavbytes().getBytes(1, 4)), equalTo("test")); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /datasets-hja-birdsong/src/test/java/org/knowm/datasets/hjabirdsong/TestTenFoldDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.hjabirdsong; 36 | 37 | import static org.hamcrest.CoreMatchers.equalTo; 38 | import static org.hamcrest.MatcherAssert.assertThat; 39 | 40 | import org.junit.AfterClass; 41 | import org.junit.BeforeClass; 42 | import org.junit.Ignore; 43 | import org.junit.Test; 44 | 45 | /** 46 | * @author timmolter 47 | */ 48 | @Ignore 49 | public class TestTenFoldDAO { 50 | 51 | @BeforeClass 52 | public static void setUpDB() { 53 | 54 | TenFoldDAO.init(new String[0]); 55 | } 56 | 57 | @AfterClass 58 | public static void tearDownDB() { 59 | 60 | TenFoldDAO.release(); 61 | } 62 | 63 | @Test 64 | public void testSelectCount() { 65 | 66 | long count = TenFoldDAO.selectCount(); 67 | assertThat(count, equalTo(548L)); 68 | } 69 | 70 | @Test 71 | public void testSelect() { 72 | 73 | TenFold tenFold = TenFoldDAO.selectSingle(3); 74 | assertThat(tenFold.getBagid(), equalTo(3)); 75 | assertThat(tenFold.getFold(), equalTo(7)); 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /datasets-lshtc4/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets LSHTC4 2 | 3 | [raw data](http://www.kaggle.com/c/lshtc/data) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `LSHTC4.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data from here: http://www.kaggle.com/c/lshtc/data 13 | 1. put all files (hierarchy.zip, test-remapped and train-remapped) in `raw` folder in project root 14 | 1. Run `RawData2DB.java` and `RawData2DBHierarchy.java` with program argument `DB_HSQLDB_FILE.properties`. 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `LSHTC4.*` files into the `/usr/local/Datasets` folder. There should be three files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | 34 | ## Data Information 35 | 36 | #### Train Sample Row 37 | 38 | Data 39 | 314523, 165538, 76255, 335416, 416827 1:1 2:1 3:1 4:1 5:1 6:1 7:1 8:1 9:1 10:1 11:1 12:2 13:1 14:2 15:1 16:1 17:1 18:1 19:1 20:1 21:1 22:1 23:1 24:5 25:1 26:1 27:1 28:1 29:1 30:3 31:1 32:2 33:1 34:1 35:1 36:3 37:1 38:1 39:1 40:1 41:3 42:1 43:7 44:1 45:1 46:1 47:1 48:1 49:1 50:1 51:2 52:1 53:1 54:1 55:1 56:1 57:3 58:4 59:1 60:2 61:3 62:1 63:1 64:1 65:1 66:1 67:2 68:1 69:1 70:1 71:1 72:1 73:1 74:1 75:2 76:2 77:1 78:1 79:1 80:1 81:1 40 | 41 | #### Test Sample Row 42 | 43 | Id,Data 44 | 1,0 139:1 153:4 199:1 212:1 232:1 282:1 307:3 310:1 428:1 510:1 528:1 609:1 700:2 709:1 727:1 765:1 791:1 798:2 838:1 872:1 1007:1 1170:2 1374:1 1388:1 1409:1 1435:1 1892:1 2190:1 2197:1 2253:1 2348:2 2570:1 2628:1 2713:1 3066:1 3406:1 3619:2 3628:2 3636:1 3649:2 5068:1 8385:1 9371:1 11248:1 11806:1 45 | 46 | 47 | #### Observations 48 | 49 | longestLabelsStringLength: 1344 50 | longestFeaturesStringLength: 47449 51 | highestFeatureID: 1617899 52 | highestFeatureValue: 1700 53 | highestLabelID: 445729 54 | 55 | Train entries: 2,365,437 56 | Test entries: 452,167 57 | Total Entries: 2,817,603 58 | 59 | Total Hierarchy Entries: 863261 60 | 61 | -------------------------------------------------------------------------------- /datasets-lshtc4/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | org.knowm.datasets 7 | datasets-parent 8 | 2.2.0-SNAPSHOT 9 | 10 | 11 | datasets-lshtc4 12 | 13 | Knowm Datasets for machine learning applications 14 | http://knowm.org/open-source/ 15 | 2014 16 | 17 | 18 | Knowm Inc. 19 | http://knowm.org 20 | 21 | 22 | 23 | 24 | org.knowm.datasets 25 | datasets-common 26 | 2.2.0-SNAPSHOT 27 | 28 | 29 | org.hsqldb 30 | hsqldb 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.lshtc4; 36 | 37 | import org.knowm.datasets.common.business.Bean; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class LSHTC4 extends Bean { 43 | 44 | private String labels; 45 | private String features; 46 | 47 | public String getLabels() { 48 | 49 | return labels; 50 | } 51 | 52 | public String[] getLabelsAsArray() { 53 | 54 | return labels.split(","); 55 | } 56 | 57 | public void setLabels(String labels) { 58 | 59 | this.labels = labels; 60 | } 61 | 62 | public String getFeatures() { 63 | 64 | return features; 65 | } 66 | 67 | public String[] getFeaturesAsArray() { 68 | 69 | return features.split(","); 70 | } 71 | 72 | public void setFeatures(String features) { 73 | 74 | this.features = features; 75 | } 76 | 77 | @Override 78 | public String toString() { 79 | 80 | return "LSHTC4 [id=" + getId() + ", labels=" + labels + ", features=" + features + "]"; 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4DAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.lshtc4; 36 | 37 | import org.knowm.yank.Yank; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class LSHTC4DAO extends LSHTC4ParentDAO { 43 | 44 | public static int dropTable() { 45 | 46 | return Yank.execute("DROP TABLE IF EXISTS LSHTC4", null); 47 | } 48 | 49 | public static int getTrainTestSplit() { 50 | 51 | return 452167; 52 | } 53 | 54 | public static int createTable() { 55 | 56 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null); 57 | } 58 | 59 | public static int insert(LSHTC4 lSHTC4) { 60 | 61 | Object[] params = new Object[] { 62 | 63 | // @formatter:off 64 | lSHTC4.getId(), lSHTC4.getLabels(), lSHTC4.getFeatures() 65 | // @formatter:on 66 | }; 67 | String LSHTC4_INSERT = "INSERT INTO LSHTC4 (id, labels, features) VALUES (?, ?, ?)"; 68 | return Yank.execute(LSHTC4_INSERT, params); 69 | 70 | } 71 | 72 | public static LSHTC4 selectSingle(int id) { 73 | 74 | Object[] params = new Object[] { id }; 75 | 76 | String SELECT_SINGLE = "SELECT * FROM LSHTC4 WHERE id = ?"; 77 | 78 | return Yank.queryBean(SELECT_SINGLE, LSHTC4.class, params); 79 | } 80 | 81 | public static long selectCount() { 82 | 83 | String SELECT_COUNT = "SELECT COUNT(*) FROM LSHTC4"; 84 | 85 | return Yank.queryScalar(SELECT_COUNT, Long.class, null); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4Hierarchy.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.lshtc4; 36 | 37 | import org.knowm.datasets.common.business.Bean; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class LSHTC4Hierarchy extends Bean { 43 | 44 | int parentid; 45 | int nodeid; 46 | 47 | public int getParentid() { 48 | 49 | return parentid; 50 | } 51 | 52 | public void setParentid(int parentid) { 53 | 54 | this.parentid = parentid; 55 | } 56 | 57 | public int getId() { 58 | 59 | return nodeid; 60 | } 61 | 62 | public void setId(int nodeid) { 63 | 64 | this.nodeid = nodeid; 65 | } 66 | 67 | public int getNodeid() { 68 | 69 | return nodeid; 70 | } 71 | 72 | public void setNodeid(int nodeid) { 73 | 74 | this.nodeid = nodeid; 75 | } 76 | 77 | @Override 78 | public String toString() { 79 | 80 | return "LSHTC4Hierarchy [parentid=" + parentid + ", nodeid=" + nodeid + "]"; 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4HierarchyDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.lshtc4; 36 | 37 | import java.util.List; 38 | 39 | import org.knowm.yank.Yank; 40 | 41 | /** 42 | * @author timmolter 43 | */ 44 | public class LSHTC4HierarchyDAO extends LSHTC4ParentDAO { 45 | 46 | public static int dropTable() { 47 | 48 | return Yank.execute("DROP TABLE IF EXISTS LSHTC4Hierarchy", null); 49 | } 50 | 51 | public static int createTable() { 52 | 53 | return Yank.executeSQLKey("KEY_CREATE_TABLE_HIERARCHY", null); 54 | } 55 | 56 | public static int insert(LSHTC4Hierarchy lSHTC4Hierarchy) { 57 | 58 | Object[] params = new Object[] { 59 | 60 | // @formatter:off 61 | lSHTC4Hierarchy.getParentid(), lSHTC4Hierarchy.getNodeid() 62 | // @formatter:on 63 | }; 64 | String LSHTC4Hierarchy_INSERT = "INSERT INTO LSHTC4Hierarchy (parentid, nodeid) VALUES (?, ?)"; 65 | return Yank.execute(LSHTC4Hierarchy_INSERT, params); 66 | 67 | } 68 | 69 | public static List selectAll() { 70 | 71 | String SELECT_ALL = "SELECT * FROM LSHTC4Hierarchy"; 72 | 73 | return Yank.queryBeanList(SELECT_ALL, LSHTC4Hierarchy.class, null); 74 | } 75 | 76 | public static LSHTC4Hierarchy selectSingle(int id) { 77 | 78 | Object[] params = new Object[] { id }; 79 | 80 | String SELECT_SINGLE = "SELECT * FROM LSHTC4Hierarchy WHERE nodeid = ?"; 81 | 82 | return Yank.queryBean(SELECT_SINGLE, LSHTC4Hierarchy.class, params); 83 | } 84 | 85 | public static long selectCount() { 86 | 87 | String SELECT_COUNT = "SELECT COUNT(*) FROM LSHTC4Hierarchy"; 88 | 89 | return Yank.queryScalar(SELECT_COUNT, Long.class, null); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/java/org/knowm/datasets/lshtc4/LSHTC4ParentDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.lshtc4; 36 | 37 | import org.knowm.datasets.common.business.DatasetsDAO; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class LSHTC4ParentDAO extends DatasetsDAO { 43 | 44 | public static void init(String dataFilesDir) { 45 | 46 | String dataFileID = "0ByP7_A9vXm17bFdZVzEyMWhsVFk"; 47 | String propsFileID = "0ByP7_A9vXm17Zkl0ajF6LXlrYUE"; 48 | String scriptFileID = "0ByP7_A9vXm17ZnljLU1ybS16c2c"; 49 | 50 | init("DB_LSHTC4", dataFilesDir, dataFileID, propsFileID, scriptFileID, null, true); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_LSHTC4;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE LSHTC4 (id INTEGER NOT NULL, labels VARCHAR(1344) NOT NULL, features VARCHAR(47449) NOT NULL, PRIMARY KEY (id)) 2 | KEY_CREATE_TABLE_HIERARCHY=CREATE CACHED TABLE LSHTC4Hierarchy (parentid INTEGER NOT NULL, nodeid INTEGER NOT NULL) -------------------------------------------------------------------------------- /datasets-lshtc4/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE LSHTC4 (id INTEGER NOT NULL, labels VARCHAR(1344) NOT NULL, features VARCHAR(47449) NOT NULL, PRIMARY KEY (id)) 2 | KEY_CREATE_TABLE_HIERARCHY=CREATE TABLE LSHTC4Hierarchy (parentid INTEGER NOT NULL, nodeid INTEGER NOT NULL) -------------------------------------------------------------------------------- /datasets-lshtc4/src/test/java/org/knowm/datasets/lshtc4/TestLSHTC4HierarchyDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.lshtc4; 36 | 37 | import static org.hamcrest.CoreMatchers.equalTo; 38 | import static org.hamcrest.MatcherAssert.assertThat; 39 | 40 | import java.util.List; 41 | 42 | import org.junit.AfterClass; 43 | import org.junit.BeforeClass; 44 | import org.junit.Ignore; 45 | import org.junit.Test; 46 | import org.knowm.datasets.lshtc4.LSHTC4Hierarchy; 47 | import org.knowm.datasets.lshtc4.LSHTC4HierarchyDAO; 48 | 49 | /** 50 | * @author timmolter 51 | */ 52 | @Ignore 53 | public class TestLSHTC4HierarchyDAO { 54 | 55 | @BeforeClass 56 | public static void setUpDB() { 57 | 58 | LSHTC4HierarchyDAO.init(new String[0]); 59 | 60 | } 61 | 62 | @AfterClass 63 | public static void tearDownDB() { 64 | 65 | LSHTC4HierarchyDAO.release(); 66 | } 67 | 68 | @Test 69 | public void testSelectCount() { 70 | 71 | long count = LSHTC4HierarchyDAO.selectCount(); 72 | assertThat(count, equalTo(863261L)); 73 | 74 | } 75 | 76 | @Test 77 | public void test() { 78 | 79 | List all = LSHTC4HierarchyDAO.selectAll(); 80 | assertThat(all.size(), equalTo(863261)); 81 | 82 | // System.out.println(all.get(0)); 83 | LSHTC4Hierarchy lSHTC4Hierarchy = LSHTC4HierarchyDAO.selectSingle(2214730); 84 | int parentID = lSHTC4Hierarchy.getParentid(); 85 | assertThat(parentID, equalTo(2244783)); 86 | 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /datasets-mnist/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets MNIST 2 | 3 | [raw data](http://yann.lecun.com/exdb/mnist/) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_MNIST.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data from here: http://yann.lecun.com/exdb/mnist/ 13 | 1. put all files in `raw` folder in project root 14 | 1. Unzip all 15 | 1. Run `RawData2DB.java`. 16 | 1. The database containing the data will appear in `db`. 17 | 1. Manually copy the `DB_MNIST.*` files into the `/usr/local/Datasets` folder. There should be four files. 18 | 19 | ## How to Use the Data in You App 20 | 21 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 22 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 23 | 24 | 25 | try { 26 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 27 | BreastCancerDemo demo = new BreastCancerDemo(); 28 | demo.go(); 29 | } catch (Exception e) { 30 | e.printStackTrace(); 31 | } finally { 32 | BreastCancerDAO.release(); // release data resources 33 | } 34 | -------------------------------------------------------------------------------- /datasets-mnist/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | 7 | org.knowm.datasets 8 | datasets-parent 9 | 2.2.0-SNAPSHOT 10 | 11 | 12 | datasets-mnist 13 | 14 | Knowm Datasets for machine learning applications 15 | http://knowm.org/open-source/ 16 | 2013 17 | 18 | 19 | Knowm Inc. 20 | http://knowm.org 21 | 22 | 23 | 24 | 25 | org.knowm.datasets 26 | datasets-common 27 | 2.2.0-SNAPSHOT 28 | 29 | 30 | org.hsqldb 31 | hsqldb 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/java/mnist/tools/MnistLabelFile.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2014 Xeiam LLC http://xeiam.com 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | * this software and associated documentation files (the "Software"), to deal in 6 | * the Software without restriction, including without limitation the rights to 7 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 8 | * of the Software, and to permit persons to whom the Software is furnished to do 9 | * so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | * SOFTWARE. 21 | */ 22 | package mnist.tools; 23 | 24 | import java.io.FileNotFoundException; 25 | import java.io.IOException; 26 | 27 | /** 28 | * MNIST database label file. 29 | */ 30 | public class MnistLabelFile extends MnistDbFile { 31 | 32 | /** 33 | * Creates new MNIST database label file ready for reading. 34 | * 35 | * @param name the system-dependent filename 36 | * @param mode the access mode 37 | * @throws IOException 38 | * @throws FileNotFoundException 39 | */ 40 | public MnistLabelFile(String name, String mode) throws FileNotFoundException, IOException { 41 | 42 | super(name, mode); 43 | } 44 | 45 | /** 46 | * Reads the integer at the current position. 47 | * 48 | * @return integer representing the label 49 | * @throws IOException 50 | */ 51 | public int readLabel() throws IOException { 52 | 53 | return readUnsignedByte(); 54 | } 55 | 56 | @Override 57 | protected int getMagicNumber() { 58 | 59 | return 2049; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/java/org/knowm/datasets/mnist/Mnist.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | *

Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. Copyright 2013-2015 Xeiam 5 | * LLC (http://xeiam.com) and contributors. 6 | * 7 | *

Permission is hereby granted, free of charge, to any person obtaining a copy of this software 8 | * and associated documentation files (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, publish, distribute, 10 | * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | *

The above copyright notice and this permission notice shall be included in all copies or 14 | * substantial portions of the Software. 15 | * 16 | *

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 17 | * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | /** 23 | * This product currently only contains code developed by authors of specific components, as 24 | * identified by the source code files. 25 | * 26 | *

Since product implements StAX API, it has dependencies to StAX API classes. 27 | * 28 | *

For additional credits (generally to people who reported problems) see CREDITS file. 29 | */ 30 | package org.knowm.datasets.mnist; 31 | 32 | import java.awt.image.BufferedImage; 33 | import java.sql.Blob; 34 | import java.sql.SQLException; 35 | 36 | import org.knowm.datasets.common.business.Bean; 37 | 38 | /** @author timmolter */ 39 | public class Mnist extends Bean { 40 | 41 | private int label; 42 | private Blob imgbytes; 43 | private byte[] imageAsByteArray; 44 | 45 | public int getLabel() { 46 | 47 | return label; 48 | } 49 | 50 | public void setLabel(int label) { 51 | 52 | this.label = label; 53 | } 54 | 55 | public Blob getImgbytes() { 56 | 57 | return imgbytes; 58 | } 59 | 60 | public void setImgbytes(Blob imgbytes) { 61 | 62 | this.imgbytes = imgbytes; 63 | try { 64 | this.imageAsByteArray = imgbytes.getBytes(1, (28 * 28)); 65 | } catch (SQLException e) { 66 | e.printStackTrace(); 67 | } 68 | } 69 | 70 | public byte[][] getImageMatrix() { 71 | 72 | byte[][] imageMatrix = new byte[28][28]; 73 | for (int y = 0; y < 28; y++) { 74 | for (int x = 0; x < 28; x++) { 75 | imageMatrix[x][y] = imageAsByteArray[28 * y + x]; 76 | } 77 | } 78 | 79 | return imageMatrix; 80 | } 81 | 82 | public BufferedImage getImageAsBufferedImage() { 83 | 84 | byte[][] img = getImageMatrix(); 85 | BufferedImage bufferedImage = new BufferedImage(28, 28, BufferedImage.TYPE_INT_RGB); 86 | 87 | for (int x = 0; x < img.length; x++) { 88 | for (int y = 0; y < img[0].length; y++) { 89 | int value = img[x][y] << 16 | img[x][y] << 8 | img[x][y]; 90 | bufferedImage.setRGB(x, y, value); 91 | } 92 | } 93 | return bufferedImage; 94 | } 95 | 96 | public String toASCIIImageString() { 97 | 98 | byte[][] img = getImageMatrix(); 99 | 100 | StringBuilder sb = new StringBuilder(); 101 | for (int i = 0; i < img.length; i++) { 102 | for (int j = 0; j < img.length; j++) { 103 | if (img[i][j] > 10) { 104 | sb.append("**"); 105 | } else { 106 | sb.append(" "); 107 | } 108 | } 109 | sb.append(System.getProperty("line.separator")); 110 | } 111 | return sb.toString(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/java/org/knowm/datasets/mnist/MnistDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.mnist; 36 | 37 | import java.util.List; 38 | 39 | import org.knowm.datasets.common.business.DatasetsDAO; 40 | import org.knowm.yank.Yank; 41 | 42 | /** 43 | * @author timmolter 44 | */ 45 | public class MnistDAO extends DatasetsDAO { 46 | 47 | public static void init(String dataFilesDir) { 48 | 49 | String dataFileID = "0ByP7_A9vXm17V1NvVzdOY2dURUU"; 50 | String propsFileID = "0ByP7_A9vXm17cGRudjlMQXpTY1U"; 51 | String scriptFileID = "0ByP7_A9vXm17Qm5rMzlEa2VrUm8"; 52 | String lobsFileID = "0ByP7_A9vXm17Sml5dXdrSXozUjQ"; 53 | 54 | init("DB_MNIST", dataFilesDir, dataFileID, propsFileID, scriptFileID, lobsFileID, true); 55 | } 56 | 57 | public static int dropTable() { 58 | 59 | return Yank.execute("DROP TABLE IF EXISTS MNIST", null); 60 | } 61 | 62 | public static int getTrainTestSplit() { 63 | 64 | return 60000; 65 | } 66 | 67 | public static int createTable() { 68 | 69 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null); 70 | } 71 | 72 | public static int insert(Mnist mnist) { 73 | 74 | Object[] params = new Object[]{ 75 | 76 | // @formatter:off 77 | mnist.getId(), mnist.getLabel(), mnist.getImgbytes() 78 | // @formatter:on 79 | }; 80 | String CENSUS_INCOME_INSERT = "INSERT INTO MNIST (id, label, imgbytes) VALUES (?, ?, ?)"; 81 | return Yank.execute(CENSUS_INCOME_INSERT, params); 82 | 83 | } 84 | 85 | public static List selectAll() { 86 | 87 | String SELECT_ALL = "SELECT * FROM MNIST"; 88 | 89 | return Yank.queryBeanList(SELECT_ALL, Mnist.class, null); 90 | } 91 | 92 | public static Mnist selectSingle(int id) { 93 | 94 | Object[] params = new Object[]{id}; 95 | 96 | String SELECT_SINGLE = "SELECT * FROM MNIST WHERE id = ?"; 97 | 98 | return Yank.queryBean(SELECT_SINGLE, Mnist.class, params); 99 | } 100 | 101 | public static long selectCount() { 102 | 103 | String SELECT_COUNT = "SELECT COUNT(*) FROM MNIST"; 104 | 105 | return Yank.queryScalar(SELECT_COUNT, Long.class, null); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/java/org/knowm/datasets/mnist/RawData2DB.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.mnist; 36 | 37 | import java.io.IOException; 38 | import java.sql.SQLException; 39 | import java.util.Arrays; 40 | 41 | import javax.sql.rowset.serial.SerialBlob; 42 | 43 | import mnist.tools.MnistManager; 44 | 45 | /** 46 | * @author timmolter 47 | */ 48 | public class RawData2DB { 49 | 50 | int idx = 0; 51 | 52 | public static void main(String[] args) throws IOException, SQLException { 53 | 54 | MnistDAO.init(new String[0]); 55 | 56 | MnistDAO.dropTable(); 57 | MnistDAO.createTable(); 58 | 59 | RawData2DB dp = new RawData2DB(); 60 | System.out.println("processing MNIST training images..."); 61 | dp.go("./raw/train-images-idx3-ubyte", "./raw/train-labels-idx1-ubyte", 1, 60000); 62 | System.out.println("processing MNIST test images..."); 63 | dp.go("./raw/t10k-images-idx3-ubyte", "./raw/t10k-labels-idx1-ubyte", 1, 10000); 64 | System.out.println("done."); 65 | 66 | MnistDAO.release(); 67 | } 68 | 69 | private void go(String imageDataFile, String labelDataFile, int startIdx, int endIdx) throws IOException, SQLException { 70 | 71 | int longestStringLength = 0; 72 | 73 | MnistManager mnistManager = new MnistManager(imageDataFile, labelDataFile); 74 | for (int n = startIdx; n <= endIdx; n++) { 75 | mnistManager.setCurrent(n); // index of the image that we are interested in 76 | 77 | byte[] imageAsSignedByteArray = mnistManager.readImageAsSignedByteArray(); 78 | System.out.println("imageAsByteArray = " + Arrays.toString(imageAsSignedByteArray)); 79 | 80 | Mnist mnist = new Mnist(); 81 | mnist.setId(idx++); 82 | mnist.setLabel(mnistManager.readLabel()); 83 | mnist.setImgbytes(new SerialBlob(imageAsSignedByteArray)); 84 | // MnistDAO.insert(mnist); 85 | } 86 | // System.out.println("longestStringLength: " + longestStringLength); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/java/org/knowm/datasets/mnist/tools/MnistDigitViewer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2014 Xeiam LLC http://xeiam.com 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | * this software and associated documentation files (the "Software"), to deal in 6 | * the Software without restriction, including without limitation the rights to 7 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 8 | * of the Software, and to permit persons to whom the Software is furnished to do 9 | * so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | * SOFTWARE. 21 | */ 22 | package org.knowm.datasets.mnist.tools; 23 | 24 | import java.awt.BorderLayout; 25 | 26 | import javax.swing.JFrame; 27 | import javax.swing.JPanel; 28 | 29 | /** 30 | * @author timmolter 31 | */ 32 | public class MnistDigitViewer extends JFrame { 33 | 34 | public MnistDigitViewer(JPanel jPanel, String title) { 35 | 36 | add(jPanel, BorderLayout.CENTER); 37 | pack(); 38 | 39 | setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 40 | setLocationRelativeTo(null); 41 | setTitle(title); 42 | setResizable(false); 43 | setVisible(true); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/java/org/knowm/datasets/mnist/tools/MnistImagePanel.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2014 Xeiam LLC http://xeiam.com 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | * this software and associated documentation files (the "Software"), to deal in 6 | * the Software without restriction, including without limitation the rights to 7 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 8 | * of the Software, and to permit persons to whom the Software is furnished to do 9 | * so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | * SOFTWARE. 21 | */ 22 | package org.knowm.datasets.mnist.tools; 23 | 24 | import java.awt.Dimension; 25 | import java.awt.Graphics; 26 | import java.awt.geom.AffineTransform; 27 | import java.awt.image.AffineTransformOp; 28 | import java.awt.image.BufferedImage; 29 | 30 | import javax.swing.JPanel; 31 | 32 | import org.knowm.datasets.mnist.Mnist; 33 | 34 | /** 35 | * @author alexnugent 36 | */ 37 | public class MnistImagePanel extends JPanel { 38 | 39 | private BufferedImage bufferedImage; 40 | 41 | public MnistImagePanel(Mnist mnistData) { 42 | 43 | bufferedImage = mnistData.getImageAsBufferedImage(); 44 | 45 | setPreferredSize(new Dimension(bufferedImage.getWidth(), bufferedImage.getHeight())); 46 | } 47 | 48 | private void scale(int scale) { 49 | 50 | int w = bufferedImage.getWidth(); 51 | int h = bufferedImage.getHeight(); 52 | BufferedImage after = new BufferedImage(w, h, BufferedImage.TYPE_INT_ARGB); 53 | AffineTransform at = new AffineTransform(); 54 | at.scale(scale, scale); 55 | AffineTransformOp scaleOp = new AffineTransformOp(at, AffineTransformOp.TYPE_BILINEAR); 56 | after = scaleOp.filter(bufferedImage, after); 57 | 58 | this.bufferedImage = after; 59 | } 60 | 61 | @Override 62 | public void paintComponent(Graphics g) { 63 | 64 | super.paintComponent(g); 65 | g.drawImage(bufferedImage, 0, 0, null); // see javadoc for more info on the parameters 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_MNIST;shutdown=true;hsqldb.lob_file_scale=1 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-mnist/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE MNIST (id INTEGER NOT NULL, label INTEGER NOT NULL, imgbytes blob NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-mnist/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE MNIST (id INTEGER NOT NULL, label INTEGER NOT NULL, imagedata VARCHAR(2333) NOT NULL, imgbytes blob NOT NULL, PRIMARY\ 2 | KEY (id)) -------------------------------------------------------------------------------- /datasets-mnist/src/main/resources/simplelogger.properties: -------------------------------------------------------------------------------- 1 | # SLF4J's SimpleLogger configuration file 2 | # Simple implementation of Logger that sends all enabled log messages, for all defined loggers, to System.err. 3 | 4 | # Default logging detail level for all instances of SimpleLogger. 5 | # Must be one of ("trace", "debug", "info", "warn", or "error"). 6 | # If not specified, defaults to "info". 7 | org.slf4j.simpleLogger.defaultLogLevel=debug 8 | 9 | # Logging detail level for a SimpleLogger instance named "xxxxx". 10 | # Must be one of ("trace", "debug", "info", "warn", or "error"). 11 | # If not specified, the default logging detail level is used. 12 | #org.slf4j.simpleLogger.log.xxxxx= 13 | 14 | # Set to true if you want the current date and time to be included in output messages. 15 | # Default is false, and will output the number of milliseconds elapsed since startup. 16 | #org.slf4j.simpleLogger.showDateTime=false 17 | 18 | # The date and time format to be used in the output messages. 19 | # The pattern describing the date and time format is the same that is used in java.text.SimpleDateFormat. 20 | # If the format is not specified or is invalid, the default format is used. 21 | # The default format is yyyy-MM-dd HH:mm:ss:SSS Z. 22 | #org.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd HH:mm:ss:SSS Z 23 | 24 | # Set to true if you want to output the current thread name. 25 | # Defaults to true. 26 | #org.slf4j.simpleLogger.showThreadName=true 27 | 28 | # Set to true if you want the Logger instance name to be included in output messages. 29 | # Defaults to true. 30 | #org.slf4j.simpleLogger.showLogName=true 31 | 32 | # Set to true if you want the last component of the name to be included in output messages. 33 | # Defaults to false. 34 | #org.slf4j.simpleLogger.showShortLogName=true -------------------------------------------------------------------------------- /datasets-mnist/src/test/java/org/knowm/datasets/mnist/unit/TestMnistDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.mnist.unit; 36 | 37 | import static org.hamcrest.CoreMatchers.equalTo; 38 | import static org.hamcrest.CoreMatchers.not; 39 | import static org.hamcrest.MatcherAssert.assertThat; 40 | 41 | import org.junit.AfterClass; 42 | import org.junit.BeforeClass; 43 | import org.junit.Ignore; 44 | import org.junit.Test; 45 | import org.knowm.datasets.mnist.Mnist; 46 | import org.knowm.datasets.mnist.MnistDAO; 47 | 48 | /** 49 | * @author timmolter 50 | */ 51 | @Ignore 52 | public class TestMnistDAO { 53 | 54 | @BeforeClass 55 | public static void setUpDB() { 56 | 57 | MnistDAO.init(new String[0]); 58 | } 59 | 60 | @AfterClass 61 | public static void tearDownDB() { 62 | 63 | MnistDAO.release(); 64 | } 65 | 66 | @Test 67 | public void testSelectCount() { 68 | 69 | long count = MnistDAO.selectCount(); 70 | assertThat(count, equalTo(70000L)); 71 | } 72 | 73 | @Test 74 | public void testSelectSingle() { 75 | 76 | Mnist mnist = MnistDAO.selectSingle(2); 77 | assertThat(mnist, not(equalTo(null))); 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /datasets-nsl-kdd/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets NSL-KDD 2 | 3 | [raw data](http://nsl.cs.unb.ca/NSL-KDD/) 4 | 5 | Note that the data is no longer available, but it can be accessed as described [here](https://stackoverflow.com/questions/31722843/unable-to-get-nsl-kdd-datasets). 6 | 7 | ## How to Get the Data 8 | 9 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 10 | 1. Manually copy the `DB_NSL_KDD.*` files into the `/usr/local/Datasets` folder. There should be four files. 11 | 12 | ## How to Generate Data 13 | 14 | 1. Download raw data from here: http://nsl.cs.unb.ca/NSL-KDD/ 15 | 1. put all files (`KDDTrain+.txt` and `KDDTest+.txt`) in `raw` folder in project root 16 | 1. Run `RawData2DB.java`. 17 | 1. The database containing the data will appear in `db`. 18 | 1. Manually copy the `DB_NSL_KDD.*` files into the `/usr/local/Datasets` folder. There should be three files. 19 | 20 | ## Build jar containing Data 21 | 22 | ## How to Use the Data in You App 23 | 24 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 25 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 26 | 27 | 28 | try { 29 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 30 | BreastCancerDemo demo = new BreastCancerDemo(); 31 | demo.go(); 32 | } catch (Exception e) { 33 | e.printStackTrace(); 34 | } finally { 35 | BreastCancerDAO.release(); // release data resources 36 | } 37 | -------------------------------------------------------------------------------- /datasets-nsl-kdd/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | org.knowm.datasets 7 | datasets-parent 8 | 2.2.0-SNAPSHOT 9 | 10 | 11 | datasets-nsl-kdd 12 | 13 | Knowm Datasets for machine learning applications 14 | http://knowm.org/open-source/ 15 | 2013 16 | 17 | 18 | Knowm Inc. 19 | http://knowm.org 20 | 21 | 22 | 23 | 24 | org.knowm.datasets 25 | datasets-common 26 | 2.2.0-SNAPSHOT 27 | 28 | 29 | org.hsqldb 30 | hsqldb 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /datasets-nsl-kdd/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_NSL_KDD;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-nsl-kdd/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE NSL_KDD (id INTEGER NOT NULL, duration FLOAT, protocol_type VARCHAR(16), service VARCHAR(16), flag VARCHAR(16), src_bytes FLOAT, dst_bytes FLOAT, " + "land VARCHAR(16), " + "wrong_fragment FLOAT, " + "urgent FLOAT, " + "hot FLOAT, " + "num_failed_logins FLOAT, logged_in VARCHAR(16), " + "num_compromised FLOAT, " + "root_shell FLOAT, " + "su_attempted FLOAT, " + "num_root FLOAT, "num_file_creations FLOAT, " + "num_shells FLOAT, " + "num_access_files FLOAT, " + "num_outbound_cmds FLOAT, "is_host_login VARCHAR(16), " + "is_guest_login VARCHAR(16), " + "concurrentcount FLOAT, " + "srv_count FLOAT, " + "serror_rate FLOAT, srv_serror_rate FLOAT, " + "rerror_rate FLOAT, " + "srv_rerror_rate FLOAT, " + "same_srv_rate FLOAT, " + "diff_srv_rate FLOAT, srv_diff_host_rate FLOAT, " + "dst_host_count FLOAT, " + "dst_host_srv_count FLOAT, " + "dst_host_same_srv_rate FLOAT, dst_host_diff_srv_rate FLOAT, " + "dst_host_same_src_port_rate FLOAT, " + "dst_host_srv_diff_host_rate FLOAT, dst_host_serror_rate FLOAT, " + "dst_host_srv_serror_rate FLOAT, " + "dst_host_rerror_rate FLOAT, " + "dst_host_srv_rerror_rate FLOAT, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-nsl-kdd/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE NSL_KDD (id INTEGER NOT NULL, duration FLOAT, protocol_type VARCHAR(16), service VARCHAR(16), flag VARCHAR(16), src_bytes FLOAT, dst_bytes FLOAT, " + "land VARCHAR(16), " + "wrong_fragment FLOAT, " + "urgent FLOAT, " + "hot FLOAT, " + "num_failed_logins FLOAT, logged_in VARCHAR(16), " + "num_compromised FLOAT, " + "root_shell FLOAT, " + "su_attempted FLOAT, " + "num_root FLOAT, "num_file_creations FLOAT, " + "num_shells FLOAT, " + "num_access_files FLOAT, " + "num_outbound_cmds FLOAT, "is_host_login VARCHAR(16), " + "is_guest_login VARCHAR(16), " + "concurrentcount FLOAT, " + "srv_count FLOAT, " + "serror_rate FLOAT, srv_serror_rate FLOAT, " + "rerror_rate FLOAT, " + "srv_rerror_rate FLOAT, " + "same_srv_rate FLOAT, " + "diff_srv_rate FLOAT, srv_diff_host_rate FLOAT, " + "dst_host_count FLOAT, " + "dst_host_srv_count FLOAT, " + "dst_host_same_srv_rate FLOAT, dst_host_diff_srv_rate FLOAT, " + "dst_host_same_src_port_rate FLOAT, " + "dst_host_srv_diff_host_rate FLOAT, dst_host_serror_rate FLOAT, " + "dst_host_srv_serror_rate FLOAT, " + "dst_host_rerror_rate FLOAT, " + "dst_host_srv_rerror_rate FLOAT, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-nsl-kdd/src/test/java/org/knowm/datasets/nslkdd/TestNSLKDDDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.nslkdd; 36 | 37 | import static org.hamcrest.CoreMatchers.equalTo; 38 | import static org.hamcrest.MatcherAssert.assertThat; 39 | 40 | import org.junit.AfterClass; 41 | import org.junit.BeforeClass; 42 | import org.junit.Ignore; 43 | import org.junit.Test; 44 | import org.knowm.datasets.nslkdd.NSLKDD; 45 | import org.knowm.datasets.nslkdd.NSLKDDDAO; 46 | 47 | /** 48 | * @author timmolter 49 | */ 50 | @Ignore 51 | public class TestNSLKDDDAO { 52 | 53 | @BeforeClass 54 | public static void setUpDB() { 55 | 56 | NSLKDDDAO.init(new String[0]); 57 | 58 | } 59 | 60 | @AfterClass 61 | public static void tearDownDB() { 62 | 63 | NSLKDDDAO.release(); 64 | } 65 | 66 | @Test 67 | public void testSelectCount() { 68 | 69 | long count = NSLKDDDAO.selectCount(); 70 | assertThat(count, equalTo(148517L)); 71 | } 72 | 73 | @Test 74 | public void testSelectSingle() { 75 | 76 | NSLKDD nSLKDD = NSLKDDDAO.selectSingle(4); 77 | System.out.println(nSLKDD); 78 | assertThat(nSLKDD.getSrc_bytes(), equalTo(199f)); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /datasets-numenta/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets Numenta 2 | 3 | [raw data](https://github.com/numenta/NAB) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_NUMENTA.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to populate the DB with NAB Data 11 | 12 | 1. Pull the "data" and "label" directories from https://github.com/numenta/NAB 13 | 1. put both directories in the `raw` folder in project root 14 | 1. Run `RawData2DB.java` 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `DB_NUMENTA.*` files into the `/usr/local/Datasets` folder. There should be four files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | -------------------------------------------------------------------------------- /datasets-numenta/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | 7 | org.knowm.datasets 8 | datasets-parent 9 | 2.2.0-SNAPSHOT 10 | 11 | 12 | datasets-numenta 13 | 14 | Knowm Datasets for machine learning applications 15 | http://knowm.org/open-source/ 16 | 2013 17 | 18 | 19 | Knowm Inc. 20 | http://knowm.org 21 | 22 | 23 | 24 | 25 | org.knowm.datasets 26 | datasets-common 27 | 2.2.0-SNAPSHOT 28 | 29 | 30 | org.hsqldb 31 | hsqldb 32 | 33 | 34 | com.fasterxml.jackson.core 35 | jackson-core 36 | 37 | 38 | org.codehaus.jackson 39 | jackson-mapper-asl 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /datasets-numenta/src/main/java/org/knowm/datasets/numenta/SeriesPoint.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.numenta; 36 | 37 | import org.knowm.datasets.common.business.Bean; 38 | 39 | public class SeriesPoint extends Bean { 40 | 41 | private String seriesGroup; 42 | private String seriesName; 43 | private long timestamp; 44 | private double value; 45 | private int label; 46 | 47 | public String getSeriesGroup() { 48 | return seriesGroup; 49 | } 50 | 51 | public void setSeriesGroup(String seriesGroup) { 52 | this.seriesGroup = seriesGroup; 53 | } 54 | 55 | public String getSeriesName() { 56 | return seriesName; 57 | } 58 | 59 | public void setSeriesName(String seriesName) { 60 | this.seriesName = seriesName; 61 | } 62 | 63 | public long getTimestamp() { 64 | return timestamp; 65 | } 66 | 67 | public void setTimestamp(long timestamp) { 68 | this.timestamp = timestamp; 69 | } 70 | 71 | public double getValue() { 72 | return value; 73 | } 74 | 75 | public void setValue(double value) { 76 | this.value = value; 77 | } 78 | 79 | public int getLabel() { 80 | return label; 81 | } 82 | 83 | public void setLabel(int label) { 84 | this.label = label; 85 | } 86 | 87 | @Override 88 | public String toString() { 89 | 90 | return "SeriesPoint [id=" + getId() + " ," + "seriesGroup=" + seriesGroup + " ," + "seriesName=" + seriesName + " ," + "timestamp=" + timestamp + 91 | " ," + "value=" + value + " ," + "label=" + label 92 | + "]"; 93 | } 94 | 95 | } 96 | -------------------------------------------------------------------------------- /datasets-numenta/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_NUMENTA;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-numenta/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE NUMENTA (id BIGINT NOT NULL, seriesGroup VARCHAR(500) NOT NULL, seriesName VARCHAR(500) NOT NULL, timestamp BIGINT NOT NULL, value DOUBLE NOT NULL, label TINYINT NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-numenta/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE NUMENTA (id BIGINT NOT NULL, seriesGroup VARCHAR(500) NOT NULL, seriesName VARCHAR(500) NOT NULL, timestamp BIGINT NOT NULL, value DOUBLE NOT NULL, label TINYINT NOT NULL, PRIMARY KEY (id)) -------------------------------------------------------------------------------- /datasets-numenta/src/test/java/org/knowm/datasets/numenta/TestNumentaDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | *

Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. Copyright 2013-2015 Xeiam 5 | * LLC (http://xeiam.com) and contributors. 6 | * 7 | *

Permission is hereby granted, free of charge, to any person obtaining a copy of this software 8 | * and associated documentation files (the "Software"), to deal in the Software without restriction, 9 | * including without limitation the rights to use, copy, modify, merge, publish, distribute, 10 | * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | *

The above copyright notice and this permission notice shall be included in all copies or 14 | * substantial portions of the Software. 15 | * 16 | *

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING 17 | * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | */ 22 | /** 23 | * This product currently only contains code developed by authors of specific components, as 24 | * identified by the source code files. 25 | * 26 | *

Since product implements StAX API, it has dependencies to StAX API classes. 27 | * 28 | *

For additional credits (generally to people who reported problems) see CREDITS file. 29 | */ 30 | package org.knowm.datasets.numenta; 31 | 32 | import java.util.List; 33 | 34 | import org.junit.AfterClass; 35 | import org.junit.BeforeClass; 36 | import org.junit.Ignore; 37 | import org.junit.Test; 38 | import org.knowm.yank.PropertiesUtils; 39 | import org.knowm.yank.Yank; 40 | 41 | @Ignore 42 | public class TestNumentaDAO { 43 | 44 | @BeforeClass 45 | public static void setUpDB() { 46 | 47 | Yank.setupDefaultConnectionPool( 48 | PropertiesUtils.getPropertiesFromClasspath("DB_HSQLDB_FILE.properties")); 49 | } 50 | 51 | @AfterClass 52 | public static void tearDownDB() { 53 | 54 | NumentaDAO.release(); 55 | } 56 | 57 | @Test 58 | public void testSelectCount() { 59 | 60 | List points = NumentaDAO.selectAll(); 61 | assert (!points.isEmpty()); 62 | } 63 | 64 | @Test 65 | public void testOrdering() { 66 | 67 | List names = NumentaDAO.selectSeriesNames(); 68 | for (String name : names) { 69 | long last = Long.MIN_VALUE; 70 | List points = NumentaDAO.selectSeries(name); 71 | for (SeriesPoint p : points) { 72 | assert (p.getTimestamp() > last); 73 | } 74 | } 75 | } 76 | 77 | @Test 78 | public void testSeriesCount() { 79 | 80 | assert (NumentaDAO.selectSeries("TravelTime_387").size() == 2500); 81 | assert (NumentaDAO.selectSeries("TravelTime_451").size() == 2162); 82 | assert (NumentaDAO.selectSeries("Twitter_volume_AAPL").size() == 15902); 83 | assert (NumentaDAO.selectSeries("art_daily_jumpsup").size() == 4032); 84 | assert (NumentaDAO.selectSeries("ec2_cpu_utilization_825cc2").size() == 4032); 85 | assert (NumentaDAO.selectSeries("exchange_2_cpm_results").size() == 1624); 86 | assert (NumentaDAO.selectSeries("speed_6005").size() == 2500); 87 | } 88 | 89 | @Test 90 | public void testSeriesNames() { 91 | 92 | List names = NumentaDAO.selectSeriesNames(); 93 | for (String name : names) { 94 | System.out.println(name); 95 | } 96 | assert (!names.isEmpty()); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /datasets-pcb/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets PCB 2 | 3 | [raw data](https://www.caa.tuwien.ac.at/cvl/research/cvl-databases/pcb-dslr-dataset/) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_PCB.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data from here: https://www.caa.tuwien.ac.at/cvl/research/cvl-databases/pcb-dslr-dataset/ 13 | 1. put all folder after unzipping in `raw` folder in project root 14 | 1. Run `RawData2DB.java`. 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `DB_PCB.*` files into the `/usr/local/Datasets` folder. There should be four files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | -------------------------------------------------------------------------------- /datasets-pcb/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | org.knowm.datasets 7 | datasets-parent 8 | 2.2.0-SNAPSHOT 9 | 10 | 11 | datasets-pcb 12 | 13 | Knowm Datasets for machine learning applications 14 | http://knowm.org/open-source/ 15 | 2013 16 | 17 | 18 | Knowm Inc. 19 | http://knowm.org 20 | 21 | 22 | 23 | 24 | org.knowm.datasets 25 | datasets-common 26 | 2.2.0-SNAPSHOT 27 | 28 | 29 | org.hsqldb 30 | hsqldb 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCB.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.pcb; 36 | 37 | import java.sql.Blob; 38 | 39 | import org.knowm.datasets.common.business.Bean; 40 | 41 | /** 42 | * @author timmolter 43 | */ 44 | public class PCB extends Bean { 45 | 46 | private Blob imgbytes; 47 | 48 | public Blob getImgbytes() { 49 | return imgbytes; 50 | } 51 | 52 | public void setImgbytes(Blob imgbytes) { 53 | this.imgbytes = imgbytes; 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | return "PCB [id=" + getId() + "]"; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCBAnnotation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.pcb; 36 | 37 | /** 38 | * @author timmolter 39 | */ 40 | public class PCBAnnotation { 41 | 42 | private int pcbid; 43 | private int id; 44 | private float x; 45 | private float y; 46 | private float width; 47 | private float height; 48 | private float rotation; 49 | private String name; 50 | 51 | public int getPcbid() { 52 | return pcbid; 53 | } 54 | 55 | public void setPcbid(int pcbid) { 56 | this.pcbid = pcbid; 57 | } 58 | 59 | public int getId() { 60 | return id; 61 | } 62 | 63 | public void setId(int id) { 64 | this.id = id; 65 | } 66 | 67 | public float getX() { 68 | return x; 69 | } 70 | 71 | public void setX(float x) { 72 | this.x = x; 73 | } 74 | 75 | public float getY() { 76 | return y; 77 | } 78 | 79 | public void setY(float y) { 80 | this.y = y; 81 | } 82 | 83 | public float getWidth() { 84 | return width; 85 | } 86 | 87 | public void setWidth(float width) { 88 | this.width = width; 89 | } 90 | 91 | public float getHeight() { 92 | return height; 93 | } 94 | 95 | public void setHeight(float height) { 96 | this.height = height; 97 | } 98 | 99 | public float getRotation() { 100 | return rotation; 101 | } 102 | 103 | public void setRotation(float rotation) { 104 | this.rotation = rotation; 105 | } 106 | 107 | public String getName() { 108 | return name; 109 | } 110 | 111 | public void setName(String name) { 112 | this.name = name; 113 | } 114 | 115 | @Override 116 | public String toString() { 117 | return "PCBAnnotation [pcbid=" + pcbid + ", id=" + id + ", x=" + x + ", y=" + y + ", width=" + width + ", height=" + height + ", rotation=" 118 | + rotation + ", name=" + name + "]"; 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCBDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.pcb; 36 | 37 | import org.knowm.yank.Yank; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class PCBDAO extends PCBParentDAO { 43 | 44 | public static int dropTable() { 45 | 46 | return Yank.execute("DROP TABLE IF EXISTS PCB", null); 47 | } 48 | 49 | public static int createTable() { 50 | 51 | return Yank.executeSQLKey("KEY_CREATE_TABLE", null); 52 | } 53 | 54 | public static int insert(PCB pcb) { 55 | 56 | Object[] params = new Object[] { 57 | 58 | // @formatter:off 59 | pcb.getId(), pcb.getImgbytes() 60 | // @formatter:on 61 | }; 62 | String PCB_INSERT = "INSERT INTO PCB (id, imgbytes) VALUES (?, ?)"; 63 | return Yank.execute(PCB_INSERT, params); 64 | 65 | } 66 | 67 | public static PCB selectSingle(int id) { 68 | 69 | Object[] params = new Object[] { id }; 70 | 71 | String SELECT_SINGLE = "SELECT * FROM PCB WHERE id = ?"; 72 | 73 | return Yank.queryBean(SELECT_SINGLE, PCB.class, params); 74 | } 75 | 76 | public static long selectCount() { 77 | 78 | String SELECT_COUNT = "SELECT COUNT(*) FROM PCB"; 79 | 80 | return Yank.queryScalar(SELECT_COUNT, Long.class, null); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /datasets-pcb/src/main/java/org/knowm/datasets/pcb/PCBParentDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.pcb; 36 | 37 | import org.knowm.datasets.common.business.DatasetsDAO; 38 | 39 | /** 40 | * @author timmolter 41 | */ 42 | public class PCBParentDAO extends DatasetsDAO { 43 | 44 | public static void init(String dataFilesDir) { 45 | 46 | String dataFileID = "0ByP7_A9vXm17SWZJa09fWnFxbGM"; 47 | String propsFileID = "0ByP7_A9vXm17RS1NMllKelJ0MlE"; 48 | String scriptFileID = "0ByP7_A9vXm17YXlZelRxV01ZdDg"; 49 | String lobsFileID = "0ByP7_A9vXm17WDBoS25pMHZmR0E"; 50 | 51 | init("DB_PCB", dataFilesDir, dataFileID, propsFileID, scriptFileID, lobsFileID, true); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /datasets-pcb/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_PCB;shutdown=true 2 | username=sa 3 | password= 4 | -------------------------------------------------------------------------------- /datasets-pcb/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE PCB (id INTEGER NOT NULL, imgbytes blob NOT NULL, PRIMARY KEY (id)) 2 | KEY_CREATE_TABLE_PCB_ANNOTATIONS=CREATE CACHED TABLE PCB_ANNOTATIONS (pcbid INTEGER NOT NULL, id INTEGER NOT NULL, x FLOAT NOT NULL, y FLOAT NOT NULL, width FLOAT NOT NULL, height FLOAT NOT NULL, rotation FLOAT NOT NULL, name VARCHAR(1000) NULL, CONSTRAINT PRI PRIMARY KEY ( pcbid, id )) -------------------------------------------------------------------------------- /datasets-pcb/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE PCB (id INTEGER NOT NULL, imgbytes mediumblob NOT NULL, PRIMARY KEY (id)) 2 | KEY_CREATE_TABLE_PCB_ANNOTATIONS=CREATE TABLE PCB_ANNOTATIONS (pcbid INTEGER NOT NULL, id INTEGER NOT NULL, x FLOAT NOT NULL, y FLOAT NOT NULL, width FLOAT NOT NULL, height FLOAT NOT NULL, rotation FLOAT NOT NULL, name VARCHAR(1000) NULL, PRIMARY KEY (`pcbid`, `id`)) -------------------------------------------------------------------------------- /datasets-pcb/src/test/java/org/knowm/datasets/pcb/TestPCBDAO.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.pcb; 36 | 37 | import static org.fest.assertions.api.Assertions.assertThat; 38 | 39 | import java.awt.image.BufferedImage; 40 | import java.io.InputStream; 41 | import java.util.List; 42 | 43 | import javax.imageio.ImageIO; 44 | 45 | import org.junit.AfterClass; 46 | import org.junit.BeforeClass; 47 | import org.junit.Ignore; 48 | import org.junit.Test; 49 | 50 | /** 51 | * @author timmolter 52 | */ 53 | @Ignore 54 | public class TestPCBDAO { 55 | 56 | @BeforeClass 57 | public static void setUpDB() { 58 | 59 | PCBDAO.init(new String[0]); 60 | PCBAnnotationDAO.init(new String[0]); 61 | } 62 | 63 | @AfterClass 64 | public static void tearDownDB() { 65 | 66 | PCBDAO.release(); 67 | PCBAnnotationDAO.release(); 68 | } 69 | 70 | @Test 71 | public void testSelectCount() { 72 | 73 | long count = PCBDAO.selectCount(); 74 | assertThat(count).isEqualTo(165L); 75 | } 76 | 77 | @Test 78 | public void testImage() { 79 | 80 | PCB pcb = PCBDAO.selectSingle(1); 81 | 82 | try (InputStream bytes = pcb.getImgbytes().getBinaryStream();) { 83 | 84 | BufferedImage bufferedImage = ImageIO.read(bytes); 85 | assertThat(bufferedImage).isNotNull(); 86 | 87 | } catch (Exception e) { 88 | e.printStackTrace(); 89 | } 90 | } 91 | 92 | @Test 93 | public void testAnnotations() { 94 | 95 | long count = PCBAnnotationDAO.selectCount(); 96 | System.out.println("" + count); 97 | 98 | List pcbAnnotations = PCBAnnotationDAO.selectList(1); 99 | 100 | assertThat(pcbAnnotations).hasSize(23); 101 | 102 | for (PCBAnnotation pcbAnnotation : pcbAnnotations) { 103 | 104 | System.out.println(pcbAnnotation.toString()); 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /datasets-reuters-21578/README.md: -------------------------------------------------------------------------------- 1 | ## Knowm Datasets Reuters-21578 2 | 3 | [raw data](http://archive.ics.uci.edu/ml/support/Reuters-21578+Text+Categorization+Collection) 4 | 5 | ## How to Get the Data 6 | 7 | 1. Manually download the database files that have been uploaded to Knowm's Google Drive account [here](https://drive.google.com/folderview?id=0ByP7_A9vXm17VXhuZzBrcnNubEE&usp=sharing#list). 8 | 1. Manually copy the `DB_PCB.*` files into the `/usr/local/Datasets` folder. There should be four files. 9 | 10 | ## How to Generate Data 11 | 12 | 1. Download raw data (`reuters21578.tar.gz`) from here: http://archive.ics.uci.edu/ml/machine-learning-databases/reuters21578-mld/ 13 | 1. unzip and put all files in `raw` folder in project root 14 | 1. Run `RawData2DB.java`. 15 | 1. The database containing the data will appear in `db`. 16 | 1. Manually copy the `DB_REUTERS_21578.*` files into the `/usr/local/Datasets` folder. There should be three files. 17 | 18 | ## How to Use the Data in You App 19 | 20 | 1. Put the desired jar on the classpath of your application. This will give you access to the DAO and Bean classes. 21 | 1. Just call `init` on the DAO class you are using, passing it the path to the database files that you downloaded or generated above. When finished with the data, call `release`. The following example is for the BreastCancer dataset. For more examples see the `datasets-samples` module. 22 | 23 | 24 | try { 25 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 26 | BreastCancerDemo demo = new BreastCancerDemo(); 27 | demo.go(); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } finally { 31 | BreastCancerDAO.release(); // release data resources 32 | } 33 | -------------------------------------------------------------------------------- /datasets-reuters-21578/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | 7 | org.knowm.datasets 8 | datasets-parent 9 | 2.2.0-SNAPSHOT 10 | 11 | 12 | datasets-reuters-21578 13 | 14 | Knowm Datasets for machine learning applications 15 | http://knowm.org/open-source/ 16 | 2013 17 | 18 | 19 | Knowm Inc. 20 | http://knowm.org 21 | 22 | 23 | 24 | 25 | org.knowm.datasets 26 | datasets-common 27 | 2.2.0-SNAPSHOT 28 | 29 | 30 | org.hsqldb 31 | hsqldb 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /datasets-reuters-21578/src/main/resources/DB_HSQLDB_FILE.properties: -------------------------------------------------------------------------------- 1 | jdbcUrl=jdbc:hsqldb:file:./db/DB_REUTERS_21578;shutdown=true 2 | username=sa 3 | password= -------------------------------------------------------------------------------- /datasets-reuters-21578/src/main/resources/SQL_HSQLDB.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE CACHED TABLE REUTERS_21578 (NEWID INTEGER NOT NULL, OLDID INTEGER NOT NULL, TOPICSBOOL TINYINT NULL, LEWISSPLIT VARCHAR(256) NULL, CGISPLIT VARCHAR(256) NULL, DATE TIME NULL, TOPICS VARCHAR(256) NULL, PLACES VARCHAR(256) NULL, PEOPLE VARCHAR(256) NULL, ORGS VARCHAR(256) NULL, EXCHANGES VARCHAR(256) NULL, COMPANIES VARCHAR(256) NULL, TITLE VARCHAR(256) NULL, DATELINE VARCHAR(256) NULL, BODY VARCHAR(13500) NULL, PRIMARY KEY (NEWID)) -------------------------------------------------------------------------------- /datasets-reuters-21578/src/main/resources/SQL_MYSQL.properties: -------------------------------------------------------------------------------- 1 | KEY_CREATE_TABLE=CREATE TABLE REUTERS_21578 (NEWID INTEGER NOT NULL, OLDID INTEGER NOT NULL, TOPICSBOOL TINYINT NULL, LEWISSPLIT VARCHAR(256) NULL, CGISPLIT VARCHAR(256) NULL, DATE TIME NULL, TOPICS VARCHAR(256) NULL, PLACES VARCHAR(256) NULL, PEOPLE VARCHAR(256) NULL, ORGS VARCHAR(256) NULL, EXCHANGES VARCHAR(256) NULL, COMPANIES VARCHAR(256) NULL, TITLE VARCHAR(256) NULL, DATELINE VARCHAR(256) NULL, BODY VARCHAR(13500) NULL, PRIMARY KEY (NEWID)) -------------------------------------------------------------------------------- /datasets-samples/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | org.knowm.datasets 7 | datasets-parent 8 | 2.2.0-SNAPSHOT 9 | 10 | 11 | datasets-samples 12 | 13 | 14 | 15 | org.knowm.datasets 16 | datasets-common 17 | 18 | 19 | org.hsqldb 20 | hsqldb 21 | compile 22 | 23 | 24 | org.knowm.datasets 25 | datasets-breast-cancer-wisconsin-original 26 | 2.2.0-SNAPSHOT 27 | 28 | 29 | org.knowm.datasets 30 | datasets-hja-birdsong 31 | 2.2.0-SNAPSHOT 32 | 33 | 34 | org.knowm.datasets 35 | datasets-census-income 36 | 2.2.0-SNAPSHOT 37 | 38 | 39 | org.knowm.datasets 40 | datasets-cifar10 41 | 2.2.0-SNAPSHOT 42 | 43 | 44 | org.knowm.datasets 45 | datasets-lshtc4 46 | 2.2.0-SNAPSHOT 47 | 48 | 49 | org.knowm.datasets 50 | datasets-mnist 51 | 2.2.0-SNAPSHOT 52 | 53 | 54 | org.knowm.datasets 55 | datasets-reuters-21578 56 | 2.2.0-SNAPSHOT 57 | 58 | 59 | org.knowm.datasets 60 | datasets-nsl-kdd 61 | 2.2.0-SNAPSHOT 62 | 63 | 64 | org.knowm.datasets 65 | datasets-ucsd-anomaly 66 | 2.2.0-SNAPSHOT 67 | 68 | 69 | org.knowm.datasets 70 | datasets-higgs-boson 71 | 2.2.0-SNAPSHOT 72 | 73 | 74 | org.knowm.datasets 75 | datasets-pcb 76 | 2.2.0-SNAPSHOT 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /datasets-samples/src/main/java/org/knowm/datasets/samples/BreastCancerDataInspector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.samples; 36 | 37 | import java.util.ArrayList; 38 | import java.util.List; 39 | 40 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancer; 41 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancerDAO; 42 | import org.knowm.datasets.common.data.HistogramDataInspector; 43 | 44 | /** 45 | * @author timmolter 46 | */ 47 | public class BreastCancerDataInspector extends HistogramDataInspector { 48 | 49 | private List malignant = new ArrayList(); 50 | private List benign = new ArrayList(); 51 | 52 | public static void main(String[] args) { 53 | 54 | try { 55 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 56 | // TRAIN_TEST_SPLIT = EventDAO.selectCount() * 4 / 5; 57 | 58 | BreastCancerDataInspector breastCancerDataInspector = new BreastCancerDataInspector(); 59 | breastCancerDataInspector.generateTrainAndTestArraysTruncated(); 60 | breastCancerDataInspector.go(); 61 | 62 | } catch (Exception e) { 63 | e.printStackTrace(); 64 | } finally { 65 | BreastCancerDAO.release(); // release data resources 66 | } 67 | } 68 | 69 | private void generateTrainAndTestArraysTruncated() { 70 | 71 | int numSamples = (int) BreastCancerDAO.selectCount(); 72 | for (int i = 0; i < numSamples; i++) { 73 | 74 | BreastCancer breastCancer = BreastCancerDAO.selectSingle(i); 75 | if (breastCancer.getCellClass() == 4) { 76 | malignant.add(breastCancer); 77 | } else { 78 | benign.add(breastCancer); 79 | } 80 | } 81 | } 82 | 83 | @Override 84 | public List getFirstSamples() { 85 | 86 | return malignant; 87 | } 88 | 89 | @Override 90 | public List getSecondSamples() { 91 | 92 | return benign; 93 | } 94 | 95 | @Override 96 | public String getFirstLabel() { 97 | 98 | return "Malignant"; 99 | } 100 | 101 | @Override 102 | public String getSecondLabel() { 103 | 104 | return "Benign"; 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /datasets-samples/src/main/java/org/knowm/datasets/samples/BreastCancerDemo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.samples; 36 | 37 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancer; 38 | import org.knowm.datasets.breastcancerwisconsinorginal.BreastCancerDAO; 39 | 40 | /** 41 | * @author timmolter 42 | */ 43 | public class BreastCancerDemo { 44 | 45 | public static void main(String[] args) { 46 | 47 | try { 48 | BreastCancerDAO.init("/usr/local/Datasets/"); // setup data 49 | BreastCancerDemo demo = new BreastCancerDemo(); 50 | demo.go(); 51 | } catch (Exception e) { 52 | e.printStackTrace(); 53 | } finally { 54 | BreastCancerDAO.release(); // release data resources 55 | } 56 | } 57 | 58 | private void go() { 59 | 60 | // print number of objects 61 | long count = BreastCancerDAO.selectCount(); 62 | System.out.println("count= " + count); 63 | 64 | // loop through train objects 65 | for (int i = 0; i < BreastCancerDAO.getTrainTestSplit(); i++) { 66 | BreastCancer breastCancer = BreastCancerDAO.selectSingle(i); 67 | System.out.println(breastCancer.toString()); 68 | } 69 | 70 | // loop through test objects 71 | for (int i = BreastCancerDAO.getTrainTestSplit(); i < count; i++) { 72 | BreastCancer breastCancer = BreastCancerDAO.selectSingle(i); 73 | System.out.println(breastCancer.toString()); 74 | } 75 | 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /datasets-samples/src/main/java/org/knowm/datasets/samples/CensusIncomeDemo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.samples; 36 | 37 | import org.knowm.datasets.censusincome.CensusIncome; 38 | import org.knowm.datasets.censusincome.CensusIncomeDAO; 39 | 40 | /** 41 | * @author timmolter 42 | */ 43 | public class CensusIncomeDemo { 44 | 45 | public static void main(String[] args) { 46 | 47 | try { 48 | CensusIncomeDAO.init("/usr/local/Datasets"); // setup data 49 | CensusIncomeDemo demo = new CensusIncomeDemo(); 50 | demo.go(); 51 | } catch (Exception e) { 52 | e.printStackTrace(); 53 | } finally { 54 | CensusIncomeDAO.release(); // release data resources 55 | } 56 | } 57 | 58 | private void go() { 59 | 60 | // print number of objects 61 | long count = CensusIncomeDAO.selectCount(); 62 | System.out.println("count= " + count); 63 | 64 | // loop through train objects 65 | for (int i = 0; i < CensusIncomeDAO.getTrainTestSplit(); i++) { 66 | CensusIncome censusIncome = CensusIncomeDAO.selectSingle(i); 67 | System.out.println(censusIncome.toString()); 68 | } 69 | 70 | // loop through test objects 71 | for (int i = CensusIncomeDAO.getTrainTestSplit(); i < count; i++) { 72 | CensusIncome censusIncome = CensusIncomeDAO.selectSingle(i); 73 | System.out.println(censusIncome.toString()); 74 | } 75 | 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /datasets-samples/src/main/java/org/knowm/datasets/samples/Cifar10Demo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.samples; 36 | 37 | import org.knowm.datasets.cifar10.Cifar; 38 | import org.knowm.datasets.cifar10.CifarDAO; 39 | 40 | /** 41 | * @author timmolter 42 | */ 43 | public class Cifar10Demo { 44 | 45 | public static void main(String[] args) { 46 | 47 | try { 48 | CifarDAO.init("/usr/local/Datasets"); // setup data 49 | Cifar10Demo demo = new Cifar10Demo(); 50 | demo.go(); 51 | } catch (Exception e) { 52 | e.printStackTrace(); 53 | } finally { 54 | CifarDAO.release(); // release data resources 55 | } 56 | } 57 | 58 | private void go() { 59 | 60 | // print number of objects 61 | long count = CifarDAO.selectCount(); 62 | System.out.println("count= " + count); 63 | 64 | // loop through train objects 65 | for (int i = 0; i < CifarDAO.getTrainTestSplit(); i++) { 66 | Cifar cifar = CifarDAO.selectSingle(i); 67 | System.out.println(cifar.toString()); 68 | } 69 | 70 | // loop through test objects 71 | for (int i = CifarDAO.getTrainTestSplit(); i < count; i++) { 72 | Cifar cifar = CifarDAO.selectSingle(i); 73 | System.out.println(cifar.toString()); 74 | } 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /datasets-samples/src/main/java/org/knowm/datasets/samples/CifarImageDisplayApp.java: -------------------------------------------------------------------------------- 1 | /** 2 | * (The MIT License) 3 | * 4 | * Copyright 2015-2017 Knowm Inc. (http://knowm.org) and contributors. 5 | * Copyright 2013-2015 Xeiam LLC (http://xeiam.com) and contributors. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | /** 26 | * This product currently only contains code developed by authors 27 | * of specific components, as identified by the source code files. 28 | * 29 | * Since product implements StAX API, it has dependencies to StAX API 30 | * classes. 31 | * 32 | * For additional credits (generally to people who reported problems) 33 | * see CREDITS file. 34 | */ 35 | package org.knowm.datasets.samples; 36 | 37 | import javax.swing.JPanel; 38 | 39 | import org.knowm.datasets.cifar10.Cifar; 40 | import org.knowm.datasets.cifar10.CifarDAO; 41 | import org.knowm.datasets.cifar10.CifarImagePanel; 42 | import org.knowm.datasets.cifar10.CifarViewer; 43 | 44 | /** 45 | * @author alexnugent 46 | */ 47 | public class CifarImageDisplayApp { 48 | 49 | /** 50 | * This app takes the following arguments: 51 | *