├── .gitignore ├── palmetto ├── src │ ├── main │ │ ├── resources │ │ │ ├── cc │ │ │ │ └── mallet │ │ │ │ │ └── util │ │ │ │ │ └── resources │ │ │ │ │ └── logging.properties │ │ │ ├── log4j.properties │ │ │ └── license_template.txt │ │ └── java │ │ │ └── org │ │ │ └── aksw │ │ │ └── palmetto │ │ │ ├── io │ │ │ ├── DocumentTextSupplier.java │ │ │ ├── SimpleDocumentTextSupplier.java │ │ │ ├── GoldStandardReader.java │ │ │ └── SimpleWordSetReader.java │ │ │ ├── evaluate │ │ │ └── correlation │ │ │ │ └── RankCorrelationCalculator.java │ │ │ ├── prob │ │ │ ├── bd │ │ │ │ └── BooleanDocumentFrequencyDeterminer.java │ │ │ ├── decorator │ │ │ │ ├── SlidingWindowFrequencyDeterminerDecorator.java │ │ │ │ ├── FrequencyDeterminerDecorator.java │ │ │ │ ├── AbstractFrequencyDeterminerDecorator.java │ │ │ │ ├── AbstractSlidingWindowFrequencyDeterminerDecorator.java │ │ │ │ └── SimpleFrequencyCachingDeterminerDecorator.java │ │ │ ├── window │ │ │ │ ├── WindowBasedFrequencyDeterminer.java │ │ │ │ └── WindowBasedProbabilityEstimator.java │ │ │ ├── FrequencyDeterminer.java │ │ │ ├── AbstractProbabilitySupplier.java │ │ │ └── ProbabilityEstimator.java │ │ │ ├── corpus │ │ │ ├── CorpusAdapter.java │ │ │ ├── lucene │ │ │ │ ├── SlidingWindowCounter.java │ │ │ │ ├── creation │ │ │ │ │ └── IndexableDocument.java │ │ │ │ └── SimpleAnalyzer.java │ │ │ └── WindowSupportingAdapter.java │ │ │ ├── calculations │ │ │ ├── indirect │ │ │ │ ├── VectorBasedConfirmationMeasure.java │ │ │ │ ├── CosinusConfirmationMeasure.java │ │ │ │ ├── DiceConfirmationMeasure.java │ │ │ │ ├── JaccardConfirmationMeasure.java │ │ │ │ └── CentroidConfirmationMeasure.java │ │ │ ├── ConfirmationMeasure.java │ │ │ └── direct │ │ │ │ ├── LogBasedCalculation.java │ │ │ │ ├── DirectConfirmationMeasure.java │ │ │ │ ├── JointProbabilityConfirmationMeasure.java │ │ │ │ ├── AbstractUndefinedResultHandlingConfirmationMeasure.java │ │ │ │ ├── DifferenceBasedConfirmationMeasure.java │ │ │ │ ├── JaccardConfirmationMeasure.java │ │ │ │ └── LogJaccardConfirmationMeasure.java │ │ │ ├── weight │ │ │ ├── Weighter.java │ │ │ ├── EqualWeighter.java │ │ │ ├── ConditionalProbabilityBasedWeighter.java │ │ │ ├── MarginalProbabilityBasedWeighter.java │ │ │ ├── CompleteProbabilityBasedWeighter.java │ │ │ └── WordSetSizeBasedWeighter.java │ │ │ ├── subsets │ │ │ ├── Segmentator.java │ │ │ ├── SetSet.java │ │ │ ├── OneAll.java │ │ │ ├── OneSet.java │ │ │ ├── AllAll.java │ │ │ ├── AllOne.java │ │ │ ├── OnePreceding.java │ │ │ ├── OneSucceeding.java │ │ │ ├── OneOne.java │ │ │ └── OneOneAndSelf.java │ │ │ ├── data │ │ │ ├── SubsetVectors.java │ │ │ └── SubsetProbabilities.java │ │ │ ├── Coherence.java │ │ │ ├── aggregation │ │ │ ├── ArithmeticMean.java │ │ │ ├── Min.java │ │ │ ├── Max.java │ │ │ ├── QuadraticMean.java │ │ │ ├── Aggregation.java │ │ │ ├── GeometricMean.java │ │ │ ├── HarmonicMean.java │ │ │ └── Median.java │ │ │ └── vector │ │ │ ├── VectorCreator.java │ │ │ ├── AbstractVectorCreator.java │ │ │ └── ProbabilityBasedVectorCreator.java │ └── test │ │ └── java │ │ └── org │ │ └── aksw │ │ └── palmetto │ │ ├── subsets │ │ ├── AllAllTest.java │ │ ├── OneAllTest.java │ │ ├── AllOneTest.java │ │ ├── OneOneTest.java │ │ ├── OneSetTest.java │ │ ├── OneOneAndSelfTest.java │ │ ├── OnePrecedingTest.java │ │ ├── OneSubsequentTest.java │ │ └── OneAnyTest.java │ │ ├── sum │ │ ├── MaxTest.java │ │ ├── MinTest.java │ │ ├── MedianTest.java │ │ ├── HarmonicMeanTest.java │ │ ├── ArithmeticMeanTest.java │ │ ├── GeometricMeanTest.java │ │ ├── AbstractSummarizationTest.java │ │ └── QuadraticMeanTest.java │ │ ├── calculations │ │ ├── indirect │ │ │ ├── AbstractVectorBasedCalculationTest.java │ │ │ ├── AbstractVectorBasedCoherenceTest.java │ │ │ └── VectorCreationTest.java │ │ └── direct │ │ │ ├── AbstractProbabilityBasedCalculationTest.java │ │ │ └── OlssonsCoherenceCalculationTest.java │ │ ├── vector │ │ └── AbstractProbCalcBasedVectorCreatorTest.java │ │ ├── evaluate │ │ ├── correlation │ │ │ ├── KendallsTauTest.java │ │ │ └── SpearmanTest.java │ │ └── rank │ │ │ └── RankerTest.java │ │ └── prob │ │ └── AbstractBooleanDocumentSupportingAdapterBasedTest.java └── README.md ├── webApp ├── src │ ├── main │ │ ├── webapp │ │ │ ├── images │ │ │ │ ├── esf.jpg │ │ │ │ ├── dice_logo.jpg │ │ │ │ ├── logo-aksw.png │ │ │ │ ├── flag_of_Europe.jpg │ │ │ │ └── palmetto_64px.png │ │ │ ├── css │ │ │ │ ├── images │ │ │ │ │ ├── ui-icons_222222_256x240.png │ │ │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ │ │ ├── ui-icons_454545_256x240.png │ │ │ │ │ ├── ui-icons_888888_256x240.png │ │ │ │ │ ├── ui-icons_cd0a0a_256x240.png │ │ │ │ │ ├── ui-bg_flat_0_aaaaaa_40x100.png │ │ │ │ │ ├── ui-bg_flat_75_ffffff_40x100.png │ │ │ │ │ ├── ui-bg_highlight-soft_55_fbf9ee_1x100.png │ │ │ │ │ ├── ui-bg_highlight-soft_65_ffffff_1x100.png │ │ │ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ │ │ │ ├── ui-bg_highlight-soft_75_dadada_1x100.png │ │ │ │ │ ├── ui-bg_highlight-soft_75_e6e6e6_1x100.png │ │ │ │ │ └── ui-bg_highlight-soft_95_fef1ec_1x100.png │ │ │ │ └── palmetto.css │ │ │ └── WEB-INF │ │ │ │ └── web.xml │ │ ├── resources │ │ │ └── palmetto.properties │ │ └── java │ │ │ └── org │ │ │ └── aksw │ │ │ └── palmetto │ │ │ └── webapp │ │ │ └── config │ │ │ └── PalmettoConfiguration.java │ └── test │ │ └── java │ │ └── org │ │ └── aksw │ │ └── palmetto │ │ └── webapp │ │ └── PerformanceTest.java ├── Dockerfile ├── README.md ├── Makefile └── tomcat-users.xml ├── Makefile ├── .travis.yml ├── .editorconfig └── .github └── workflows └── maven.yml /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | .classpath 3 | .project 4 | target 5 | .settings 6 | # Package Files # 7 | *.war 8 | *.ear 9 | -------------------------------------------------------------------------------- /palmetto/src/main/resources/cc/mallet/util/resources/logging.properties: -------------------------------------------------------------------------------- 1 | handlers= org.slf4j.bridge.SLF4JBridgeHandler 2 | -------------------------------------------------------------------------------- /webApp/src/main/webapp/images/esf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/images/esf.jpg -------------------------------------------------------------------------------- /webApp/src/main/webapp/images/dice_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/images/dice_logo.jpg -------------------------------------------------------------------------------- /webApp/src/main/webapp/images/logo-aksw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/images/logo-aksw.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/images/flag_of_Europe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/images/flag_of_Europe.jpg -------------------------------------------------------------------------------- /webApp/src/main/webapp/images/palmetto_64px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/images/palmetto_64px.png -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: build 2 | 3 | build: 4 | cd palmetto && mvn clean install 5 | cd webApp && make build 6 | 7 | dockerize: 8 | cd webApp && make dockerize 9 | 10 | -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_flat_0_aaaaaa_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_flat_0_aaaaaa_40x100.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_flat_75_ffffff_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_flat_75_ffffff_40x100.png -------------------------------------------------------------------------------- /webApp/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tomcat:9.0.65-jre8 2 | 3 | VOLUME /usr/src/indexes/ 4 | 5 | RUN rm -rf /usr/local/tomcat/webapps/* 6 | COPY target/palmetto-webapp.war /usr/local/tomcat/webapps/ROOT.war 7 | -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_highlight-soft_55_fbf9ee_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_highlight-soft_55_fbf9ee_1x100.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_highlight-soft_65_ffffff_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_highlight-soft_65_ffffff_1x100.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_highlight-soft_75_dadada_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_highlight-soft_75_dadada_1x100.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_highlight-soft_75_e6e6e6_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_highlight-soft_75_e6e6e6_1x100.png -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/images/ui-bg_highlight-soft_95_fef1ec_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dice-group/Palmetto/HEAD/webApp/src/main/webapp/css/images/ui-bg_highlight-soft_95_fef1ec_1x100.png -------------------------------------------------------------------------------- /palmetto/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Direct log messages to stdout 2 | log4j.rootLogger=WARN,stdout 3 | 4 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 5 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - <%m>%n 7 | 8 | log4j.category.org.aksw.palmetto=INFO -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | before_install: 4 | - cd palmetto 5 | - sudo apt-get install jq 6 | - sudo apt-get install snapd 7 | - sudo snap install jq 8 | - wget -O ~/codacy-coverage-reporter-assembly-latest.jar "https://github.com/codacy/codacy-coverage-reporter/releases/download/4.0.5/codacy-coverage-reporter-4.0.5-assembly.jar" 9 | 10 | after_success: 11 | - mvn jacoco:report 12 | - java -jar ~/codacy-coverage-reporter-assembly-latest.jar report -l Java -r target/site/jacoco/jacoco.xml 13 | -------------------------------------------------------------------------------- /webApp/src/main/resources/palmetto.properties: -------------------------------------------------------------------------------- 1 | # The path to the Lucene index containing the wikipedia 2 | org.aksw.palmetto.webapp.resources.AbstractCoherenceResource.indexPath=../indexes/wikipedia_bd 3 | # Number of words a topic can have 4 | org.aksw.palmetto.webapp.resources.AbstractCoherenceResource.maxWords=10 5 | # Maximum size of the lucene cache 6 | org.aksw.palmetto.corpus.lucene.CachingWindowSupportingLuceneCorpusAdapter.maxCacheSize=100 7 | # The window sizes of the different coherences 8 | org.aksw.palmetto.webapp.resources.CAResource.windowSize=5 9 | org.aksw.palmetto.webapp.resources.CPResource.windowSize=70 10 | org.aksw.palmetto.webapp.resources.CVResource.windowSize=110 11 | org.aksw.palmetto.webapp.resources.NPMIResource.windowSize=10 12 | org.aksw.palmetto.webapp.resources.UCIResource.windowSize=10 13 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig: http://EditorConfig.org 2 | # top-most EditorConfig file 3 | root = true 4 | 5 | # all files defaults 6 | [*] 7 | # Unix-style newlines with a newline ending 8 | end_of_line = lf 9 | insert_final_newline = true 10 | # Set default charset 11 | charset = utf-8 12 | # 4 space indentation 13 | indent_style = space 14 | indent_size = 4 15 | # trim whitespaces 16 | trim_trailing_whitespace = true 17 | # always insert final newline 18 | insert_final_newline = true 19 | 20 | [*.md] 21 | trim_trailing_whitespace = false 22 | 23 | # tab for makefiles 24 | [{Makefile, Makefile*, *.xml}] 25 | indent_style = tab 26 | indent_size = 4 27 | 28 | # 2 spaces for Dockerfiles 29 | [{Dockerfile, Dockerfile*}] 30 | indent_style = space 31 | indent_size = 2 32 | 33 | # 2 spaces for md, yaml, ttl, etc.. 34 | [*.{md,yml,iml,json,ttl,ts,js,html,css}] 35 | indent_style = space 36 | indent_size = 2 37 | -------------------------------------------------------------------------------- /webApp/README.md: -------------------------------------------------------------------------------- 1 | Palmetto 2 | ======== 3 | Palmetto is a quality measuring tool for topics 4 | 5 | This is the implementation of a simple web service wrapping Palmetto. 6 | 7 | Palmetto from DICE is licensed under a AGPL v3.0 License. 8 | 9 | ### Docker 10 | 11 | Palmetto can be used as a docker container. The container can be build and run from the `webApp` directory. 12 | 13 | ``` 14 | docker build -t palmetto . 15 | docker run -p 7777:8080 -d -m 4G palmetto` 16 | ``` 17 | 18 | After that there is a Tomcat listening on port 7777. The demo application can be accessed using `http://localhost:7777`. 19 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/io/DocumentTextSupplier.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.io; 19 | 20 | @Deprecated 21 | public interface DocumentTextSupplier { 22 | 23 | public String getNextDocumentText(); 24 | } 25 | -------------------------------------------------------------------------------- /webApp/Makefile: -------------------------------------------------------------------------------- 1 | default: build 2 | 3 | build: 4 | mvn clean package 5 | 6 | Major=0 7 | Minor=1 8 | Patch=5 9 | 10 | dockerize: 11 | docker build -t dicegroup/palmetto-service:latest . 12 | docker tag dicegroup/palmetto-service:latest dicegroup/palmetto-service:$(Major) 13 | docker tag dicegroup/palmetto-service:latest dicegroup/palmetto-service:$(Major).$(Minor) 14 | docker tag dicegroup/palmetto-service:latest dicegroup/palmetto-service:$(Major).$(Minor).$(Patch) 15 | 16 | push: 17 | docker push dicegroup/palmetto-service:latest 18 | docker push dicegroup/palmetto-service:$(Major) 19 | docker push dicegroup/palmetto-service:$(Major).$(Minor) 20 | docker push dicegroup/palmetto-service:$(Major).$(Minor).$(Patch) 21 | 22 | dockerize-with-index: 23 | docker build -t dicegroup/palmetto-service:en-wiki -f Dockerfile.en-wiki . 24 | docker tag dicegroup/palmetto-service:en-wiki dicegroup/palmetto-service:$(Major)-en-wiki 25 | docker tag dicegroup/palmetto-service:en-wiki dicegroup/palmetto-service:$(Major).$(Minor)-en-wiki 26 | docker tag dicegroup/palmetto-service:en-wiki dicegroup/palmetto-service:$(Major).$(Minor).$(Patch)-en-wiki 27 | 28 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/evaluate/correlation/RankCorrelationCalculator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.evaluate.correlation; 19 | 20 | public interface RankCorrelationCalculator { 21 | 22 | public double calculateRankCorrelation(final double x[], final double y[]); 23 | } 24 | -------------------------------------------------------------------------------- /palmetto/src/main/resources/license_template.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) ${year} ${owner} (${email}) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /palmetto/README.md: -------------------------------------------------------------------------------- 1 | Palmetto 2 | ======== 3 | Palmetto is a quality measuring tool for topics 4 | 5 | This is the implementation of coherence calculations for evaluating the quality of topics. If you want to learn more about coherence calculations and their meaning for topic evaluation, take a look at the project homepage - especially at the publications. 6 | 7 | Palmetto from DICE is licensed under a AGPL v3.0 License. 8 | 9 | Please take a look at the the wikipage to read how Palmetto can be used. 10 | 11 | If you are using Palmetto for an experiment or something similar that leads to a publication, please cite the paper "Exploring the Space of Topic Coherence Measures" that you can find on the project website. A link to the project website is welcome as well :) 12 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/bd/BooleanDocumentFrequencyDeterminer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.bd; 19 | 20 | import org.aksw.palmetto.prob.FrequencyDeterminer; 21 | 22 | 23 | public interface BooleanDocumentFrequencyDeterminer extends FrequencyDeterminer { 24 | 25 | public int getNumberOfDocuments(); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/corpus/CorpusAdapter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.corpus; 19 | 20 | /** 21 | * This is the interface for a corpus adapter - a component that makes word counts available to the system. 22 | * 23 | * @author m.roeder 24 | * 25 | */ 26 | public interface CorpusAdapter { 27 | 28 | public void close(); 29 | } 30 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/decorator/SlidingWindowFrequencyDeterminerDecorator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.decorator; 19 | 20 | import org.aksw.palmetto.prob.window.WindowBasedFrequencyDeterminer; 21 | 22 | public interface SlidingWindowFrequencyDeterminerDecorator extends FrequencyDeterminerDecorator, 23 | WindowBasedFrequencyDeterminer { 24 | 25 | } 26 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/decorator/FrequencyDeterminerDecorator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.decorator; 19 | 20 | import org.aksw.palmetto.prob.FrequencyDeterminer; 21 | 22 | public interface FrequencyDeterminerDecorator extends FrequencyDeterminer { 23 | 24 | public FrequencyDeterminer getDeterminer(); 25 | 26 | public void setDeterminer(FrequencyDeterminer determiner); 27 | } 28 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/indirect/VectorBasedConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | import org.aksw.palmetto.calculations.ConfirmationMeasure; 21 | import org.aksw.palmetto.data.SubsetVectors; 22 | 23 | public interface VectorBasedConfirmationMeasure extends ConfirmationMeasure { 24 | 25 | public abstract double[] calculateConfirmationValues(SubsetVectors subsetVectors); 26 | } 27 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/ConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations; 19 | 20 | /** 21 | * The general interface of a confirmation measure. 22 | * 23 | * @author Michael Röder 24 | * 25 | */ 26 | public interface ConfirmationMeasure { 27 | 28 | /** 29 | * Returns the name of the measure. 30 | * 31 | * @return the name of the measure. 32 | */ 33 | public String getName(); 34 | } 35 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/direct/LogBasedCalculation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | /** 21 | * This interface defines the epsilon used by log based calculations for 22 | * avoiding the logarithm of 0. 23 | * 24 | * @author Michael Röder 25 | * 26 | */ 27 | public interface LogBasedCalculation { 28 | 29 | /** 30 | * e={@value} 31 | */ 32 | public static final double EPSILON = 1.0E-12; 33 | } 34 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/window/WindowBasedFrequencyDeterminer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.window; 19 | 20 | import org.aksw.palmetto.prob.FrequencyDeterminer; 21 | 22 | public interface WindowBasedFrequencyDeterminer extends FrequencyDeterminer { 23 | 24 | public void setWindowSize(int windowSize); 25 | 26 | public long[] getCooccurrenceCounts(); 27 | 28 | public String getSlidingWindowModelName(); 29 | 30 | public int getWindowSize(); 31 | } 32 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/AllAllTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | @Deprecated 23 | public class AllAllTest extends AbstractSegmentatorTest { 24 | 25 | @Test 26 | public void testWordSetLength4() { 27 | int expectedSegments[] = new int[] { 15 }; 28 | int expectedConditions[][] = new int[][] { { 15 } }; 29 | testSubsetCreator(4, new AllAll(), expectedSegments, expectedConditions); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/weight/Weighter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.weight; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | /** 23 | * This is an interface for a class that can be used to weight the single elements of a segmentation scheme. 24 | * 25 | * @author m.roeder 26 | * 27 | */ 28 | @Deprecated 29 | public interface Weighter { 30 | 31 | public double[] createWeights(SubsetProbabilities probabilities); 32 | 33 | public String getName(); 34 | } 35 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/OneAllTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | public class OneAllTest extends AbstractSegmentatorTest { 23 | 24 | @Test 25 | public void testWordSetLength4() { 26 | int expectedSegments[] = new int[] { 1, 2, 4, 8 }; 27 | int expectedConditions[][] = new int[][] { { 14 }, { 13 }, { 11 }, 28 | { 7 } }; 29 | 30 | testSubsetCreator(4, new OneAll(), expectedSegments, expectedConditions); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/AllOneTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | public class AllOneTest extends AbstractSegmentatorTest { 23 | 24 | @Test 25 | public void testWordSetLength4() { 26 | int expectedSegments[] = new int[] { 14, 13, 11, 7 }; 27 | int expectedConditions[][] = new int[][] { { 1 }, { 2 }, { 4 }, { 8 } }; 28 | testSubsetCreator(4, new AllOne(), expectedSegments, expectedConditions); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/Segmentator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | /** 23 | * A {@link Segmentator} generates the {@link SegmentationDefinition} object for a 24 | * word set with the given size. 25 | * 26 | * @author Michael Röder 27 | * 28 | */ 29 | public interface Segmentator { 30 | 31 | public SegmentationDefinition getSubsetDefinition(int wordsetSize); 32 | 33 | public String getName(); 34 | } 35 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/OneOneTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | public class OneOneTest extends AbstractSegmentatorTest { 23 | 24 | @Test 25 | public void testWordSetLength4() { 26 | int expectedSegments[] = new int[] { 1, 2, 4, 8 }; 27 | int expectedConditions[][] = new int[][] { { 2, 4, 8 }, { 1, 4, 8 }, 28 | { 1, 2, 8 }, { 1, 2, 4 } }; 29 | 30 | testSubsetCreator(4, new OneOne(), expectedSegments, expectedConditions); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/OneSetTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | public class OneSetTest extends AbstractSegmentatorTest { 23 | 24 | @Test 25 | public void testWordSetLength4() { 26 | int expectedSegments[] = new int[] { 1, 2, 4, 8 }; 27 | int expectedConditions[][] = new int[][] { { 15 }, { 15 }, { 15 }, 28 | { 15 } }; 29 | 30 | testSubsetCreator(4, new OneSet(), expectedSegments, expectedConditions); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/OneOneAndSelfTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | public class OneOneAndSelfTest extends AbstractSegmentatorTest { 23 | 24 | @Test 25 | public void testWordSetLength4() { 26 | int expectedSegments[] = new int[] { 1, 2, 4, 8 }; 27 | int expectedConditions[][] = new int[][] { { 1, 2, 4, 8 }, { 1, 2, 4, 8 }, { 1, 2, 4, 8 }, { 1, 2, 4, 8 } }; 28 | 29 | testSubsetCreator(4, new OneOneAndSelf(), expectedSegments, expectedConditions); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/OnePrecedingTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | public class OnePrecedingTest extends AbstractSegmentatorTest { 23 | 24 | @Test 25 | public void testWordSetLength4() { 26 | int expectedSegments[] = new int[] { 1, 2, 4, 8 }; 27 | int expectedConditions[][] = new int[][] { {}, { 1 }, { 1, 2 }, 28 | { 1, 2, 4 } }; 29 | 30 | testSubsetCreator(4, new OnePreceding(), expectedSegments, 31 | expectedConditions); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/OneSubsequentTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.junit.Test; 21 | 22 | public class OneSubsequentTest extends AbstractSegmentatorTest { 23 | 24 | @Test 25 | public void testWordSetLength4() { 26 | int expectedSegments[] = new int[] { 1, 2, 4, 8 }; 27 | int expectedConditions[][] = new int[][] { { 2, 4, 8 }, { 4, 8 }, { 8 }, 28 | {} }; 29 | 30 | testSubsetCreator(4, new OneSucceeding(), expectedSegments, 31 | expectedConditions); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/data/SubsetVectors.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.data; 19 | 20 | /** 21 | * This class contains the segmentations, probabilities and context vectors for a word set. 22 | * 23 | * @author m.roeder 24 | * 25 | */ 26 | public class SubsetVectors extends SubsetProbabilities { 27 | 28 | public double vectors[][]; 29 | 30 | public SubsetVectors(int[] segments, int[][] conditions, double[][] vectors, double[] segmentProbabilities) { 31 | super(segments, conditions, segmentProbabilities); 32 | this.vectors = vectors; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /webApp/tomcat-users.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 14 | 21 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/FrequencyDeterminer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob; 19 | 20 | import org.aksw.palmetto.data.CountedSubsets; 21 | import org.aksw.palmetto.data.SegmentationDefinition; 22 | 23 | /** 24 | * Determines the frequencies of words and word sub sets of a given word set. 25 | * 26 | * @author m.roeder 27 | * 28 | */ 29 | public interface FrequencyDeterminer { 30 | 31 | /** 32 | * Returns the frequencies of words and word sub sets of the given word sets. 33 | * 34 | * @param wordsets 35 | * @param definitions 36 | * @return 37 | */ 38 | public CountedSubsets[] determineCounts(String wordsets[][], 39 | SegmentationDefinition definitions[]); 40 | } 41 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/weight/EqualWeighter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.weight; 19 | 20 | import java.util.Arrays; 21 | 22 | import org.aksw.palmetto.data.SubsetProbabilities; 23 | 24 | @Deprecated 25 | public class EqualWeighter implements Weighter { 26 | 27 | @Override 28 | public double[] createWeights(SubsetProbabilities probabilities) { 29 | int pos = 0; 30 | for (int i = 0; i < probabilities.segments.length; ++i) { 31 | pos += probabilities.conditions[i].length; 32 | } 33 | double weights[] = new double[pos]; 34 | Arrays.fill(weights, 1.0); 35 | return weights; 36 | } 37 | 38 | @Override 39 | public String getName() { 40 | return "E_e"; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/decorator/AbstractFrequencyDeterminerDecorator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.decorator; 19 | 20 | import org.aksw.palmetto.prob.FrequencyDeterminer; 21 | 22 | public abstract class AbstractFrequencyDeterminerDecorator implements FrequencyDeterminerDecorator { 23 | 24 | protected FrequencyDeterminer determiner; 25 | 26 | public AbstractFrequencyDeterminerDecorator(FrequencyDeterminer determiner) { 27 | this.determiner = determiner; 28 | } 29 | 30 | @Override 31 | public FrequencyDeterminer getDeterminer() { 32 | return determiner; 33 | } 34 | 35 | @Override 36 | public void setDeterminer(FrequencyDeterminer determiner) { 37 | this.determiner = determiner; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/corpus/lucene/SlidingWindowCounter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.corpus.lucene; 19 | 20 | import com.carrotsearch.hppc.IntArrayList; 21 | 22 | @Deprecated 23 | public interface SlidingWindowCounter { 24 | 25 | public void setWindowSize(int windowSize); 26 | 27 | public int determineCount(IntArrayList[] positions); 28 | 29 | /** 30 | * This method returns the sum of word sets which would be counted if one would go over the complete corpus using 31 | * the sliding window. Note that if the given word set length is 1 this method must return the sum of all 32 | * terms inside the corpus. 33 | * 34 | * @param wordSetLength 35 | * @return 36 | */ 37 | public long getWordSetCountSum(int wordSetLength); 38 | } 39 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/Coherence.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto; 19 | 20 | /** 21 | * A coherence measure calculates the coherence of a given set of top word sets. 22 | * 23 | * @author Michael Röder 24 | * 25 | */ 26 | public interface Coherence { 27 | 28 | /** 29 | * Calculates the coherence for the given set of top word sets. 30 | * 31 | * @param wordsets 32 | * set of topic top words 33 | * @return a double array containing the coherences for the given top word 34 | * sets. 35 | */ 36 | public double[] calculateCoherences(String[][] wordsets); 37 | 38 | /** 39 | * Returns the name of the coherence. 40 | * 41 | * @return the name of the coherence 42 | */ 43 | public String getName(); 44 | } 45 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/direct/DirectConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | import org.aksw.palmetto.calculations.ConfirmationMeasure; 21 | import org.aksw.palmetto.data.SubsetProbabilities; 22 | 23 | /** 24 | * This interface is implemented by all confirmation measures which are using 25 | * the probabilities directly. 26 | * 27 | * @author Michael Röder 28 | * 29 | */ 30 | public interface DirectConfirmationMeasure extends ConfirmationMeasure { 31 | 32 | /** 33 | * Calculates the confirmation values for the given subset probabilities. 34 | * 35 | * @param subsetProbabilities 36 | * subset probabilities used for the calculation 37 | * @return confirmation values 38 | */ 39 | public abstract double[] calculateConfirmationValues(SubsetProbabilities subsetProbabilities); 40 | } 41 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/MaxTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.aggregation.Max; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class MaxTest extends AbstractSummarizationTest { 30 | 31 | @Parameters 32 | public static Collection data() { 33 | return Arrays.asList(new Object[][] { { new double[] { 1, 2, 3 }, 3 }, 34 | { new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 9 }, 35 | { new double[] { 0 }, 0 }, { new double[] { -1, 0, 1 }, 1 }, { new double[] { -1, 1 }, 1 } }); 36 | } 37 | 38 | public MaxTest(double[] values, double expectedSum) { 39 | super(new Max(), values, expectedSum); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/MinTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.aggregation.Min; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class MinTest extends AbstractSummarizationTest { 30 | 31 | @Parameters 32 | public static Collection data() { 33 | return Arrays.asList(new Object[][] { { new double[] { 1, 2, 3 }, 1 }, 34 | { new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 0 }, 35 | { new double[] { 0 }, 0 }, { new double[] { -1, 0, 1 }, -1 }, { new double[] { -1, 1 }, -1 } }); 36 | } 37 | 38 | public MinTest(double[] values, double expectedSum) { 39 | super(new Min(), values, expectedSum); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /webApp/src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | Palmetto Web Application 7 | 8 | 9 | encodingFilter 10 | org.springframework.web.filter.CharacterEncodingFilter 11 | 12 | encoding 13 | UTF-8 14 | 15 | 16 | forceEncoding 17 | true 18 | 19 | 20 | 21 | encodingFilter 22 | /* 23 | 24 | 25 | 26 | index.html 27 | 28 | 29 | dispatcher 30 | org.springframework.web.servlet.DispatcherServlet 31 | 32 | contextClass 33 | 34 | org.springframework.web.context.support.AnnotationConfigWebApplicationContext 35 | 36 | 37 | 38 | contextConfigLocation 39 | org.aksw.palmetto.webapp.config.RootConfig 40 | 41 | 1 42 | 43 | 44 | 45 | dispatcher 46 | /service/* 47 | 48 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/MedianTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.aggregation.Median; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class MedianTest extends AbstractSummarizationTest { 30 | 31 | @Parameters 32 | public static Collection data() { 33 | return Arrays.asList(new Object[][] { { new double[] { 1, 2, 3 }, 2 }, 34 | { new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 4.5 }, 35 | { new double[] { 0 }, 0 }, { new double[] { -1, 0, 1 }, 0 }, { new double[] { -1, 1 }, 0 } }); 36 | } 37 | 38 | public MedianTest(double[] values, double expectedSum) { 39 | super(new Median(), values, expectedSum); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/indirect/CosinusConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | public class CosinusConfirmationMeasure extends AbstractVectorBasedCalculation { 21 | 22 | @Override 23 | public String getName() { 24 | return "m_cos"; 25 | } 26 | 27 | @Override 28 | protected double calculateSimilarity(double[] vector1, double[] vector2) { 29 | double length1 = 0, 30 | length2 = 0, 31 | sum = 0; 32 | for (int i = 0; i < vector1.length; ++i) { 33 | sum += vector1[i] * vector2[i]; 34 | length1 += Math.pow(vector1[i], 2); 35 | length2 += Math.pow(vector2[i], 2); 36 | } 37 | if ((length1 > 0) && (length2 > 0)) { 38 | return sum / (Math.sqrt(length1) * Math.sqrt(length2)); 39 | } else { 40 | return 0; 41 | } 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/indirect/DiceConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | public class DiceConfirmationMeasure extends AbstractVectorBasedCalculation { 21 | 22 | @Override 23 | public String getName() { 24 | return "m_dice"; 25 | } 26 | 27 | @Override 28 | protected double calculateSimilarity(double[] vector1, double[] vector2) { 29 | double minSum = 0; 30 | double sum = 0; 31 | 32 | for (int i = 0; i < vector1.length; ++i) { 33 | minSum += vector1[i] < vector2[i] ? vector1[i] : vector2[i]; 34 | sum += vector1[i] + vector2[i]; 35 | } 36 | 37 | if (sum > 0) { 38 | return 2 * minSum / sum; 39 | } else { 40 | // Both vectors have the length 0 41 | // so both vectors are exactly the zero vector 42 | return 1; 43 | } 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/HarmonicMeanTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.aggregation.HarmonicMean; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class HarmonicMeanTest extends AbstractSummarizationTest { 30 | 31 | @Parameters 32 | public static Collection data() { 33 | return Arrays.asList(new Object[][] { { new double[] { 1, 2, 3 }, 1.636363637 }, 34 | { new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 0 }, 35 | { new double[] { 0 }, 0 }, { new double[] { -1, 0, 1 }, 0 }, { new double[] { -1, 1 }, 0 } }); 36 | } 37 | 38 | public HarmonicMeanTest(double[] values, double expectedSum) { 39 | super(new HarmonicMean(), values, expectedSum); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/corpus/lucene/creation/IndexableDocument.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.corpus.lucene.creation; 19 | 20 | /** 21 | * A simple structure containing the text of a document and its number of tokens. 22 | * 23 | * @author Michael Röder (roeder@informatik.uni-leipzig.de) 24 | * 25 | */ 26 | public class IndexableDocument { 27 | 28 | private String text; 29 | private int numberOfTokens; 30 | 31 | public IndexableDocument(String text, int numberOfTokens) { 32 | this.text = text; 33 | this.numberOfTokens = numberOfTokens; 34 | } 35 | 36 | public String getText() { 37 | return text; 38 | } 39 | 40 | public void setText(String text) { 41 | this.text = text; 42 | } 43 | 44 | public int getNumberOfTokens() { 45 | return numberOfTokens; 46 | } 47 | 48 | public void setNumberOfTokens(int numberOfTokens) { 49 | this.numberOfTokens = numberOfTokens; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/ArithmeticMeanTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.aggregation.ArithmeticMean; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class ArithmeticMeanTest extends AbstractSummarizationTest { 30 | 31 | @Parameters 32 | public static Collection data() { 33 | return Arrays.asList(new Object[][] { { new double[] { 1, 2, 3 }, 2 }, 34 | { new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 4.5 }, 35 | { new double[] { 0 }, 0 }, { new double[] { -1, 0, 1 }, 0 }, { new double[] { -1, 1 }, 0 } }); 36 | } 37 | 38 | public ArithmeticMeanTest(double[] values, double expectedSum) { 39 | super(new ArithmeticMean(), values, expectedSum); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/GeometricMeanTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.aggregation.GeometricMean; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class GeometricMeanTest extends AbstractSummarizationTest { 30 | 31 | @Parameters 32 | public static Collection data() { 33 | return Arrays.asList(new Object[][] { { new double[] { 1, 2, 3 }, 1.817120593 }, 34 | { new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 0 }, 35 | { new double[] { 0 }, 0 }, { new double[] { -1, 0, 1 }, 0 }, { new double[] { -1, 1 }, 0 } }); 36 | } 37 | 38 | public GeometricMeanTest(double[] values, double expectedSum) { 39 | super(new GeometricMean(), values, expectedSum); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/AbstractSummarizationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | 22 | import org.aksw.palmetto.aggregation.Aggregation; 23 | import org.junit.Assert; 24 | import org.junit.Test; 25 | 26 | public abstract class AbstractSummarizationTest { 27 | 28 | private static final double DOUBLE_PRECISION_DELTA = 0.00000001; 29 | 30 | private Aggregation summarizer; 31 | private double values[]; 32 | private double expectedSum; 33 | 34 | public AbstractSummarizationTest(Aggregation summarizer, double[] values, double expectedSum) { 35 | this.summarizer = summarizer; 36 | this.values = values; 37 | this.expectedSum = expectedSum; 38 | } 39 | 40 | @Test 41 | public void test() { 42 | double weights[] = new double[values.length]; 43 | Arrays.fill(weights, 1.0); 44 | Assert.assertEquals(expectedSum, summarizer.summarize(values, weights), DOUBLE_PRECISION_DELTA); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/sum/QuadraticMeanTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.sum; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.aggregation.QuadraticMean; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class QuadraticMeanTest extends AbstractSummarizationTest { 30 | 31 | @Parameters 32 | public static Collection data() { 33 | return Arrays.asList(new Object[][] { { new double[] { 1, 2, 3 }, 2.1602468995 }, 34 | { new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 5.338539126 }, 35 | { new double[] { 0 }, 0 }, { new double[] { -1, 0, 1 }, 0.8164965809 }, 36 | { new double[] { -1, 1 }, 1 } }); 37 | } 38 | 39 | public QuadraticMeanTest(double[] values, double expectedSum) { 40 | super(new QuadraticMean(), values, expectedSum); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/weight/ConditionalProbabilityBasedWeighter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.weight; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | @Deprecated 23 | public class ConditionalProbabilityBasedWeighter implements Weighter { 24 | 25 | @Override 26 | public double[] createWeights(SubsetProbabilities probabilities) { 27 | int pos = 0; 28 | for (int i = 0; i < probabilities.segments.length; ++i) { 29 | pos += probabilities.conditions[i].length; 30 | } 31 | double weights[] = new double[pos]; 32 | 33 | pos = 0; 34 | for (int i = 0; i < probabilities.segments.length; ++i) { 35 | for (int j = 0; j < probabilities.conditions[i].length; ++j) { 36 | weights[pos] = probabilities.probabilities[probabilities.conditions[i][j]]; 37 | ++pos; 38 | } 39 | } 40 | 41 | return weights; 42 | } 43 | 44 | @Override 45 | public String getName() { 46 | return "E_c"; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/calculations/indirect/AbstractVectorBasedCalculationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | public abstract class AbstractVectorBasedCalculationTest { 24 | 25 | private static final double DOUBLE_PRECISION_DELTA = 0.00000001; 26 | 27 | private AbstractVectorBasedCalculation calculation; 28 | private double vector1[]; 29 | private double vector2[]; 30 | private double expectedResult; 31 | 32 | public AbstractVectorBasedCalculationTest(AbstractVectorBasedCalculation calculation, double[] vector1, 33 | double[] vector2, double expectedResult) { 34 | this.calculation = calculation; 35 | this.vector1 = vector1; 36 | this.vector2 = vector2; 37 | this.expectedResult = expectedResult; 38 | } 39 | 40 | @Test 41 | public void test() { 42 | Assert.assertEquals(expectedResult, calculation.calculateSimilarity(vector1, vector2), DOUBLE_PRECISION_DELTA); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/indirect/JaccardConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | public class JaccardConfirmationMeasure extends AbstractVectorBasedCalculation { 21 | 22 | @Override 23 | public String getName() { 24 | return "m_jac"; 25 | } 26 | 27 | @Override 28 | protected double calculateSimilarity(double[] vector1, double[] vector2) { 29 | double minSum = 0, 30 | maxSum = 0; 31 | for (int i = 0; i < vector1.length; ++i) { 32 | if (vector1[i] < vector2[i]) { 33 | minSum += vector1[i]; 34 | maxSum += vector2[i]; 35 | } else { 36 | minSum += vector2[i]; 37 | maxSum += vector1[i]; 38 | } 39 | } 40 | if (maxSum > 0) { 41 | return minSum / maxSum; 42 | } else { 43 | // Both vectors have the length 0 44 | // so both vectors are exactly the zero vector 45 | return 1; 46 | } 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/vector/AbstractProbCalcBasedVectorCreatorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.vector; 19 | 20 | import org.aksw.palmetto.calculations.direct.DirectConfirmationMeasure; 21 | import org.aksw.palmetto.subsets.OneOne; 22 | import org.junit.Assert; 23 | import org.junit.Test; 24 | 25 | public abstract class AbstractProbCalcBasedVectorCreatorTest extends AbstractVectorCreatorTest { 26 | 27 | protected String expectedCreatorName; 28 | 29 | public AbstractProbCalcBasedVectorCreatorTest(DirectConfirmationMeasure calculation, int wordsetSize, 30 | double[][] probabilities, double[][][] expectedVectors, String expectedCreatorName, double gamma) { 31 | super(new DirectConfirmationBasedVectorCreator(null, calculation, gamma), new OneOne(), wordsetSize, probabilities, 32 | expectedVectors); 33 | this.expectedCreatorName = expectedCreatorName; 34 | } 35 | 36 | @Test 37 | public void testCreatorName() { 38 | Assert.assertEquals(expectedCreatorName, this.vectorCreator.getVectorCreatorName()); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/SetSet.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | /** 25 | * Special {@link Segmentator} used to create Olson's coherence measure. 26 | * 27 | * @author Michael Röder (michael.roeder@uni-paderborn.de) 28 | * 29 | */ 30 | public class SetSet implements Segmentator { 31 | 32 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 33 | /* 34 | * Code the combinations of elements not with ids but with bits. 01 is 35 | * only the first element, 10 is the second and 11 is the combination of 36 | * both. 37 | */ 38 | int mask = (1 << wordsetSize) - 1; 39 | BitSet neededCounts = new BitSet(1 << wordsetSize); 40 | neededCounts.set(mask); 41 | return new SegmentationDefinition(new int[] { mask }, new int[][] {{ mask }}, neededCounts); 42 | } 43 | 44 | @Override 45 | public String getName() { 46 | return "S^{set}_{set}"; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/weight/MarginalProbabilityBasedWeighter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.weight; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | @Deprecated 23 | public class MarginalProbabilityBasedWeighter implements Weighter { 24 | 25 | @Override 26 | public double[] createWeights(SubsetProbabilities probabilities) { 27 | int pos = 0; 28 | for (int i = 0; i < probabilities.segments.length; ++i) { 29 | pos += probabilities.conditions[i].length; 30 | } 31 | double weights[] = new double[pos]; 32 | 33 | double segmentProbability; 34 | pos = 0; 35 | for (int i = 0; i < probabilities.segments.length; ++i) { 36 | segmentProbability = probabilities.probabilities[probabilities.segments[i]]; 37 | for (int j = 0; j < probabilities.conditions[i].length; ++j) { 38 | weights[pos] = segmentProbability; 39 | ++pos; 40 | } 41 | } 42 | 43 | return weights; 44 | } 45 | 46 | @Override 47 | public String getName() { 48 | return "E_m"; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/OneAll.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | public class OneAll implements Segmentator { 25 | 26 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 27 | /* 28 | * Code the combinations of elements not with ids but with bits. 01 is 29 | * only the first element, 10 is the second and 11 is the combination of 30 | * both. 31 | */ 32 | int conditions[][] = new int[wordsetSize][1]; 33 | int segments[] = new int[wordsetSize]; 34 | int bit = 1, 35 | pos = 0; 36 | int mask = (1 << wordsetSize) - 1; 37 | BitSet neededCounts = new BitSet(1 << wordsetSize); 38 | while (bit < mask) { 39 | segments[pos] = bit; 40 | neededCounts.set(bit); 41 | conditions[pos] = new int[] { mask - bit }; 42 | bit = bit << 1; 43 | ++pos; 44 | } 45 | neededCounts.set(mask); 46 | return new SegmentationDefinition(segments, conditions, neededCounts); 47 | } 48 | 49 | @Override 50 | public String getName() { 51 | return "S^{one}_{all}"; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/OneSet.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | public class OneSet implements Segmentator { 25 | 26 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 27 | /* 28 | * Code the combinations of elements not with ids but with bits. 01 is 29 | * only the first element, 10 is the second and 11 is the combination of 30 | * both. 31 | */ 32 | int conditions[][] = new int[wordsetSize][1]; 33 | int segments[] = new int[wordsetSize]; 34 | int bit = 1; 35 | int pos = 0; 36 | int mask = (1 << wordsetSize) - 1; 37 | BitSet neededCounts = new BitSet(1 << wordsetSize); 38 | while (bit < mask) { 39 | segments[pos] = bit; 40 | neededCounts.set(bit); 41 | conditions[pos] = new int[] { mask }; 42 | bit = bit << 1; 43 | ++pos; 44 | } 45 | neededCounts.set(mask); 46 | return new SegmentationDefinition(segments, conditions, neededCounts); 47 | } 48 | 49 | @Override 50 | public String getName() { 51 | return "S^{one}_{set}"; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/ArithmeticMean.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | public class ArithmeticMean implements Aggregation { 21 | 22 | @Override 23 | public double summarize(double[] values) { 24 | double sum = 0; 25 | for (int i = 0; i < values.length; ++i) { 26 | sum += values[i]; 27 | } 28 | return sum / (double) values.length; 29 | } 30 | 31 | @Override 32 | public String getName() { 33 | return "sigma_a"; 34 | } 35 | 36 | @Override 37 | public String toString() { 38 | return getName(); 39 | } 40 | 41 | @Override 42 | public double summarize(double[] values, double[] weights) { 43 | double weightSum = 0, 44 | sum = 0; 45 | for (int i = 0; i < values.length; ++i) { 46 | if (!Double.isNaN(values[i])) { 47 | sum += weights[i] * values[i]; 48 | weightSum += weights[i]; 49 | } 50 | } 51 | if (weightSum > 0) { 52 | return sum / weightSum; 53 | } else { 54 | return 0; 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/Min.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | public class Min implements Aggregation { 21 | 22 | @Override 23 | public double summarize(double[] values) { 24 | double min = Double.POSITIVE_INFINITY; 25 | for (int i = 0; i < values.length; ++i) { 26 | if (values[i] < min) { 27 | min = values[i]; 28 | } 29 | } 30 | return min; 31 | } 32 | 33 | @Override 34 | public String getName() { 35 | return "sigma_n"; 36 | } 37 | 38 | @Override 39 | public double summarize(double[] values, double[] weights) { 40 | double value, 41 | min = Double.POSITIVE_INFINITY; 42 | for (int i = 0; i < values.length; ++i) { 43 | if (!Double.isNaN(values[i])) { 44 | value = values[i] * weights[i]; 45 | if (value < min) { 46 | min = value; 47 | } 48 | } 49 | } 50 | if (Double.isInfinite(min)) { 51 | return RETURN_VALUE_FOR_UNDEFINED; 52 | } else { 53 | return min; 54 | } 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/weight/CompleteProbabilityBasedWeighter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.weight; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | @Deprecated 23 | public class CompleteProbabilityBasedWeighter implements Weighter { 24 | 25 | @Override 26 | public double[] createWeights(SubsetProbabilities probabilities) { 27 | int pos = 0; 28 | for (int i = 0; i < probabilities.segments.length; ++i) { 29 | pos += probabilities.conditions[i].length; 30 | } 31 | double weights[] = new double[pos]; 32 | 33 | double segmentProbability; 34 | pos = 0; 35 | for (int i = 0; i < probabilities.segments.length; ++i) { 36 | segmentProbability = probabilities.probabilities[probabilities.segments[i]]; 37 | for (int j = 0; j < probabilities.conditions[i].length; ++j) { 38 | weights[pos] = segmentProbability + probabilities.probabilities[probabilities.conditions[i][j]]; 39 | ++pos; 40 | } 41 | } 42 | 43 | return weights; 44 | } 45 | 46 | @Override 47 | public String getName() { 48 | return "E_s"; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/Max.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | public class Max implements Aggregation { 21 | 22 | @Override 23 | public double summarize(double[] values) { 24 | double max = Double.NEGATIVE_INFINITY; 25 | for (int i = 0; i < values.length; ++i) { 26 | if (values[i] > max) { 27 | max = values[i]; 28 | } 29 | } 30 | return max; 31 | } 32 | 33 | @Override 34 | public String getName() { 35 | return "sigma_x"; 36 | } 37 | 38 | @Override 39 | public double summarize(double[] values, double[] weights) { 40 | double value; 41 | double max = Double.NEGATIVE_INFINITY; 42 | 43 | for (int i = 0; i < values.length; ++i) { 44 | if (!Double.isNaN(values[i])) { 45 | value = values[i] * weights[i]; 46 | if (value > max) { 47 | max = value; 48 | } 49 | } 50 | } 51 | if (Double.isInfinite(max)) { 52 | return RETURN_VALUE_FOR_UNDEFINED; 53 | } else { 54 | return max; 55 | } 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/AllAll.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | /** 25 | * 26 | * @author Michael Röder (michael.roeder@uni-paderborn.de) 27 | * 28 | * @deprecated since it does not follow the naming convention. Use SetSet instead. 29 | */ 30 | @Deprecated 31 | public class AllAll implements Segmentator { 32 | 33 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 34 | /* 35 | * Code the combinations of elements not with ids but with bits. 01 is 36 | * only the first element, 10 is the second and 11 is the combination of 37 | * both. 38 | */ 39 | int mask = (1 << wordsetSize) - 1; 40 | int conditions[][] = new int[][] { { mask } }; 41 | int segments[] = new int[] { mask }; 42 | BitSet neededCounts = new BitSet(1 << wordsetSize); 43 | neededCounts.set(mask); 44 | return new SegmentationDefinition(segments, conditions, neededCounts); 45 | } 46 | 47 | @Override 48 | public String getName() { 49 | return "S^{all}_{all}"; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/QuadraticMean.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | public class QuadraticMean implements Aggregation { 21 | 22 | @Override 23 | public double summarize(double[] values) { 24 | double sum = 0; 25 | for (int i = 0; i < values.length; ++i) { 26 | sum += Math.pow(values[i], 2); 27 | } 28 | return Math.sqrt(sum / values.length); 29 | } 30 | 31 | @Override 32 | public String getName() { 33 | return "sigma_q"; 34 | } 35 | 36 | @Override 37 | public String toString() { 38 | return getName(); 39 | } 40 | 41 | @Override 42 | public double summarize(double[] values, double[] weights) { 43 | double sum = 0; 44 | double weightSum = 0; 45 | 46 | // int count = 0; 47 | for (int i = 0; i < values.length; ++i) { 48 | if (!Double.isNaN(values[i])) { 49 | sum += weights[i] * Math.pow(values[i], 2); 50 | weightSum += weights[i]; 51 | // ++count; 52 | } 53 | } 54 | if (weightSum > 0) { 55 | return Math.sqrt(sum / weightSum); 56 | } else { 57 | return 0; 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /.github/workflows/maven.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Maven 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven 3 | 4 | name: Maven Build 5 | 6 | on: 7 | push: 8 | branches: [ master, develop ] 9 | pull_request: 10 | branches: [ master, develop ] 11 | 12 | jobs: 13 | test-lib: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up JDK 11 18 | uses: actions/setup-java@v2 19 | with: 20 | java-version: '11' 21 | distribution: 'adopt' 22 | - name: Cache Maven packages 23 | uses: actions/cache@v2 24 | with: 25 | path: ~/.m2 26 | key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} 27 | restore-keys: ${{ runner.os }}-m2 28 | - name: Compile project 29 | run: mvn clean compile -DskipTests=true -Dmaven.javadoc.skip=true -B -V 30 | working-directory: palmetto 31 | - name: Run tests 32 | run: mvn jacoco:prepare-agent package -Dmaven.javadoc.skip=true -B jacoco:report 33 | working-directory: palmetto 34 | - name: Upload test results to Codacy 35 | run: bash <(curl -Ls https://coverage.codacy.com/get.sh) report -r palmetto/target/site/jacoco/jacoco.xml 36 | env: 37 | CODACY_PROJECT_TOKEN: ${{ secrets.CODACY_PROJECT_TOKEN }} 38 | build-demo: 39 | needs: test-lib 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: actions/checkout@v2 43 | - name: Set up JDK 11 44 | uses: actions/setup-java@v2 45 | with: 46 | java-version: '11' 47 | distribution: 'adopt' 48 | - name: Cache Maven packages 49 | uses: actions/cache@v2 50 | with: 51 | path: ~/.m2 52 | key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} 53 | restore-keys: ${{ runner.os }}-m2 54 | - name: install library 55 | run: mvn clean install -DskipTests=true -Dmaven.javadoc.skip=true 56 | working-directory: palmetto 57 | - name: Compile project 58 | run: mvn clean compile -DskipTests=true -Dmaven.javadoc.skip=true -B -V 59 | working-directory: webApp 60 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/corpus/WindowSupportingAdapter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.corpus; 19 | 20 | import com.carrotsearch.hppc.IntArrayList; 21 | import com.carrotsearch.hppc.IntIntOpenHashMap; 22 | import com.carrotsearch.hppc.IntObjectOpenHashMap; 23 | 24 | /** 25 | * This adapter supports window based probability estimation methods. 26 | * 27 | * @author m.roeder 28 | * 29 | */ 30 | public interface WindowSupportingAdapter extends CorpusAdapter { 31 | 32 | /** 33 | * Returns the histogram of the document sizes of the corpus. 34 | * 35 | * @return the histogram of the document sizes 36 | */ 37 | public int[][] getDocumentSizeHistogram(); 38 | 39 | /** 40 | * Returns the positions of the given words inside the corpus. 41 | * 42 | * @param words 43 | * the words for which the positions inside the documents should 44 | * be determined 45 | * @param docLengths 46 | * empty int int map in which the document lengths and counts are 47 | * inserted 48 | * @return the positions of the given words inside the corpus 49 | */ 50 | public IntObjectOpenHashMap requestWordPositionsInDocuments(String words[], 51 | IntIntOpenHashMap docLengths); 52 | } 53 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/AllOne.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | public class AllOne implements Segmentator { 25 | 26 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 27 | /* 28 | * Code the combinations of elements not with ids but with bits. 01 is 29 | * only the first element, 10 is the second and 11 is the combination of 30 | * both. 31 | */ 32 | int conditions[][] = new int[wordsetSize][1]; 33 | int segments[] = new int[wordsetSize]; 34 | int bit = 1, 35 | pos = 0; 36 | int mask = (1 << wordsetSize) - 1; 37 | BitSet neededCounts = new BitSet(1 << wordsetSize); 38 | while (bit < mask) { 39 | segments[pos] = mask - bit; 40 | neededCounts.set(segments[pos]); 41 | conditions[pos] = new int[] { bit }; 42 | bit = bit << 1; 43 | ++pos; 44 | } 45 | neededCounts.set(mask); 46 | return new SegmentationDefinition(segments, conditions, neededCounts); 47 | } 48 | 49 | @Override 50 | public String getName() { 51 | return "S^{all}_{one}"; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/weight/WordSetSizeBasedWeighter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.weight; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | @Deprecated 23 | public class WordSetSizeBasedWeighter implements Weighter { 24 | 25 | @Override 26 | public double[] createWeights(SubsetProbabilities probabilities) { 27 | // get the number of words the complete word set comprises of 28 | int numberOfWords = Integer.numberOfTrailingZeros(probabilities.probabilities.length); 29 | int pos = 0; 30 | for (int i = 0; i < probabilities.segments.length; ++i) { 31 | pos += probabilities.conditions[i].length; 32 | } 33 | double weights[] = new double[pos]; 34 | 35 | pos = 0; 36 | for (int i = 0; i < probabilities.segments.length; ++i) { 37 | for (int j = 0; j < probabilities.conditions[i].length; ++j) { 38 | weights[pos] = ((numberOfWords - Integer.bitCount(probabilities.segments[i] 39 | | probabilities.conditions[i][j])) + 2.0) 40 | / numberOfWords; 41 | ++pos; 42 | } 43 | } 44 | 45 | return weights; 46 | } 47 | 48 | @Override 49 | public String getName() { 50 | return "E_l"; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/Aggregation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | /** 21 | * Aggregates the given confirmation values and returns a single coherence 22 | * value. 23 | * 24 | * @author m.roeder 25 | * 26 | */ 27 | public interface Aggregation { 28 | 29 | /** 30 | * The return value if the aggregation of the given values is not defined. 31 | */ 32 | public static final double RETURN_VALUE_FOR_UNDEFINED = 0; 33 | 34 | /** 35 | * Aggregates the given confirmation values and returns a single coherence 36 | * value. 37 | * 38 | * @param values 39 | * values that should be aggregated 40 | * @return aggregated value 41 | */ 42 | public double summarize(double values[]); 43 | 44 | /** 45 | * Aggregates the product of the given confirmation values and the given 46 | * weights and returns a single coherence value. 47 | * 48 | * @param values 49 | * values that should be aggregated 50 | * @param weights 51 | * weights of the single values 52 | * @return aggregated value 53 | */ 54 | public double summarize(double values[], double weights[]); 55 | 56 | /** 57 | * Returns the name of the aggregation. 58 | * 59 | * @return name of the aggregation 60 | */ 61 | public String getName(); 62 | } 63 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/OnePreceding.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | public class OnePreceding implements Segmentator { 25 | 26 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 27 | /* 28 | * Code the combinations of elements not with ids but with bits. 01 is 29 | * only the first element, 10 is the second and 11 is the combination of 30 | * both. 31 | */ 32 | int conditions[][] = new int[wordsetSize][]; 33 | int segments[] = new int[wordsetSize]; 34 | int bit = 1, 35 | pos = 0; 36 | int mask = (1 << wordsetSize) - 1; 37 | BitSet neededCounts = new BitSet(1 << wordsetSize); 38 | while (bit < mask) { 39 | segments[pos] = bit; 40 | neededCounts.set(bit); 41 | conditions[pos] = new int[pos]; 42 | for (int i = 0; i < pos; ++i) { 43 | neededCounts.set(bit + (1 << i)); 44 | conditions[pos][i] = 1 << i; 45 | } 46 | bit = bit << 1; 47 | ++pos; 48 | } 49 | return new SegmentationDefinition(segments, conditions, neededCounts); 50 | } 51 | 52 | @Override 53 | public String getName() { 54 | return "S^{one}_{pre}"; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/io/SimpleDocumentTextSupplier.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.io; 19 | 20 | import java.io.File; 21 | import java.util.Scanner; 22 | 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | @Deprecated 27 | public class SimpleDocumentTextSupplier implements DocumentTextSupplier { 28 | private Scanner scanner; 29 | 30 | private static final Logger LOGGER = LoggerFactory.getLogger(SimpleDocumentTextSupplier.class); 31 | 32 | public static SimpleDocumentTextSupplier create(File inputFile) { 33 | Scanner scanner = null; 34 | SimpleDocumentTextSupplier supplier = null; 35 | try { 36 | scanner = new Scanner(inputFile); 37 | supplier = new SimpleDocumentTextSupplier(scanner); 38 | } catch (Exception e) { 39 | LOGGER.error("Couldn't open input file. Returning null.", e); 40 | } 41 | return supplier; 42 | } 43 | 44 | private SimpleDocumentTextSupplier(Scanner scanner) { 45 | this.scanner = scanner; 46 | } 47 | 48 | public String getNextDocumentText() { 49 | String text = null; 50 | if (scanner != null) { 51 | if (scanner.hasNext()) { 52 | text = scanner.nextLine(); 53 | } else { 54 | scanner.close(); 55 | scanner = null; 56 | } 57 | } 58 | return text; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/direct/JointProbabilityConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | /** 23 | * This confirmation measure simply returns the joint probability of W' and W* 24 | * as value. result = P(W',W*) 25 | * 26 | * @author Michael Röder 27 | * 28 | */ 29 | public class JointProbabilityConfirmationMeasure implements DirectConfirmationMeasure { 30 | 31 | public JointProbabilityConfirmationMeasure() { 32 | super(); 33 | } 34 | 35 | @Override 36 | public double[] calculateConfirmationValues(SubsetProbabilities subsetProbabilities) { 37 | int pos = 0; 38 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 39 | pos += subsetProbabilities.conditions[i].length; 40 | } 41 | double values[] = new double[pos]; 42 | pos = 0; 43 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 44 | for (int j = 0; j < subsetProbabilities.conditions[i].length; ++j) { 45 | values[pos] = subsetProbabilities.probabilities[subsetProbabilities.segments[i] 46 | | subsetProbabilities.conditions[i][j]]; 47 | ++pos; 48 | } 49 | } 50 | return values; 51 | } 52 | 53 | @Override 54 | public String getName() { 55 | return "m_P"; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/OneSucceeding.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | public class OneSucceeding implements Segmentator { 25 | 26 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 27 | /* 28 | * Code the combinations of elements not with ids but with bits. 01 is 29 | * only the first element, 10 is the second and 11 is the combination of 30 | * both. 31 | */ 32 | int conditions[][] = new int[wordsetSize][]; 33 | int segments[] = new int[wordsetSize]; 34 | int bit = 1; 35 | int pos = 0; 36 | int mask = (1 << wordsetSize) - 1; 37 | BitSet neededCounts = new BitSet(1 << wordsetSize); 38 | while (bit < mask) { 39 | segments[pos] = bit; 40 | neededCounts.set(bit); 41 | conditions[pos] = new int[wordsetSize - (pos + 1)]; 42 | for (int i = pos + 1; i < wordsetSize; ++i) { 43 | neededCounts.set(bit + (1 << i)); 44 | conditions[pos][i - (pos + 1)] = 1 << i; 45 | } 46 | bit = bit << 1; 47 | ++pos; 48 | } 49 | return new SegmentationDefinition(segments, conditions, neededCounts); 50 | } 51 | 52 | @Override 53 | public String getName() { 54 | return "S^{one}_{suc}"; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/io/GoldStandardReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.io; 19 | 20 | import java.io.File; 21 | import java.io.IOException; 22 | import java.nio.charset.StandardCharsets; 23 | import java.util.List; 24 | 25 | import org.apache.commons.io.FileUtils; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import com.carrotsearch.hppc.DoubleArrayList; 30 | 31 | public class GoldStandardReader { 32 | 33 | private static final Logger LOGGER = LoggerFactory.getLogger(GoldStandardReader.class); 34 | 35 | public static double[] readGoldStandard(String file) throws IOException { 36 | List lines = FileUtils.readLines(new File(file), StandardCharsets.UTF_8); 37 | 38 | DoubleArrayList ratings = new DoubleArrayList(); 39 | for (String line : lines) { 40 | try { 41 | ratings.add(Double.parseDouble(line)); 42 | } catch (NumberFormatException e) { 43 | throw new IOException("Error while reading gold standard.", e); 44 | } 45 | } 46 | return ratings.toArray(); 47 | } 48 | 49 | public static double[] readGoldStandardSavely(String file) { 50 | double ratings[] = null; 51 | try { 52 | ratings = readGoldStandard(file); 53 | } catch (IOException e) { 54 | LOGGER.error("Error while trying to read the gold standard. Returning null.", e); 55 | } 56 | return ratings; 57 | } 58 | } -------------------------------------------------------------------------------- /webApp/src/main/webapp/css/palmetto.css: -------------------------------------------------------------------------------- 1 | /* The html and body elements cannot have any padding or margin. */ 2 | html, 3 | body { 4 | height: 100%; 5 | width: 100%; 6 | margin: 0px; 7 | } 8 | 9 | body { 10 | float: none; 11 | } 12 | 13 | /*div#headline div { 14 | float:left; 15 | }*/ 16 | div#title { 17 | margin-left: 10px; 18 | } 19 | 20 | div#demo_description { 21 | padding-bottom: 20px; 22 | } 23 | 24 | div#coherence_radio, div#descriptions { 25 | float: left; 26 | } 27 | 28 | div#coherences_config { 29 | min-height: 320px; 30 | } 31 | 32 | div#introduction, div#input_line { 33 | clear: both; 34 | } 35 | 36 | div#input_line { 37 | padding-bottom: 20px; 38 | } 39 | 40 | div.description { 41 | min-height: 132px; 42 | } 43 | 44 | label.radio_label { 45 | margin: 0px; 46 | } 47 | 48 | span.sub { 49 | vertical-align: sub; 50 | font-size: 11px; 51 | } 52 | 53 | span.math { 54 | font-family: 'Times New Roman', Times, serif; 55 | font-style: italic; 56 | } 57 | 58 | div#alert-row { 59 | padding-top: 10px; 60 | padding-bottom: 10px; 61 | } 62 | 63 | /* Wrapper for page content to push down footer */ 64 | #wrap { 65 | min-height: 100%; 66 | height: auto !important; 67 | height: 100%; 68 | /* Negative indent footer by it's height */ 69 | margin: 0 auto -60px; 70 | /* had to add a padding to the bottom since the position of the footer doesn't seem to work as it should */ 71 | padding-bottom: 40px; 72 | } 73 | 74 | /* Set the fixed height of the footer here */ 75 | #push, #footer { 76 | padding-top: 40px; 77 | } 78 | 79 | /* Set the fixed height of the footer here */ 80 | #footer { 81 | /* background-color: #f5f5f5; */ 82 | background-color: #ffffff; 83 | } 84 | 85 | div#logos { 86 | display: table; 87 | table-layout: fixed; 88 | width: 100%; 89 | height: 60px; 90 | } 91 | 92 | /* Same height for footer logos */ 93 | div#logos > a > img { 94 | height: 60px; 95 | width: auto; 96 | } 97 | 98 | div#logos > a { 99 | display: table-cell; 100 | vertical-align: middle; 101 | float: none; 102 | } 103 | 104 | /* Lastly, apply responsive CSS fixes as necessary */ 105 | @media (max-width : 767px) { 106 | #footer { 107 | margin-left: -20px; 108 | margin-right: -20px; 109 | padding-left: 20px; 110 | padding-right: 20px; 111 | } 112 | } -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/GeometricMean.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | public class GeometricMean implements Aggregation { 21 | 22 | @Override 23 | public double summarize(double[] values) { 24 | double prod = 1; 25 | for (int i = 0; i < values.length; ++i) { 26 | if (values[i] <= 0) { 27 | // the geometric mean is not defined for negative numbers 28 | return 0; 29 | } 30 | prod *= values[i]; 31 | } 32 | return Math.pow(prod, 1.0 / values.length); 33 | } 34 | 35 | @Override 36 | public String getName() { 37 | return "sigma_g"; 38 | } 39 | 40 | @Override 41 | public String toString() { 42 | return getName(); 43 | } 44 | 45 | @Override 46 | public double summarize(double[] values, double[] weights) { 47 | double weightSum = 0, 48 | prod = 0; 49 | for (int i = 0; i < values.length; ++i) { 50 | if (!Double.isNaN(values[i])) { 51 | if (values[i] <= 0) { 52 | // the geometric mean is not defined for negative numbers 53 | return 0; 54 | } 55 | 56 | prod += weights[i] * Math.log(values[i]); 57 | weightSum += weights[i]; 58 | } 59 | } 60 | 61 | if (weightSum > 0) { 62 | return Math.exp(prod / weightSum); 63 | } else { 64 | return 0; 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/HarmonicMean.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | public class HarmonicMean implements Aggregation { 21 | 22 | @Override 23 | public double summarize(double[] values) { 24 | double sum = 0; 25 | for (int i = 0; i < values.length; ++i) { 26 | if (values[i] == 0) { 27 | // if one if the values is 0 the harmonic mean goes against 0 28 | return 0; 29 | } 30 | sum += 1.0 / values[i]; 31 | } 32 | if (sum == 0) { 33 | return 0; 34 | } else { 35 | return values.length / sum; 36 | } 37 | } 38 | 39 | @Override 40 | public String getName() { 41 | return "sigma_h"; 42 | } 43 | 44 | @Override 45 | public String toString() { 46 | return getName(); 47 | } 48 | 49 | @Override 50 | public double summarize(double[] values, double[] weights) { 51 | double weightSum = 0; 52 | double sum = 0; 53 | for (int i = 0; i < values.length; ++i) { 54 | if (!Double.isNaN(values[i])) { 55 | if (values[i] == 0) { 56 | // if one if the values is 0 the harmonic mean goes against 0 57 | return 0; 58 | } 59 | sum += weights[i] / values[i]; 60 | weightSum += weights[i]; 61 | } 62 | } 63 | if (sum == 0) { 64 | return 0; 65 | } else { 66 | return weightSum / sum; 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/direct/AbstractUndefinedResultHandlingConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | /** 21 | * An abstract class of all confirmation measures which have to handle undefined 22 | * results. 23 | * 24 | * @author Michael Röder 25 | * 26 | */ 27 | public abstract class AbstractUndefinedResultHandlingConfirmationMeasure implements DirectConfirmationMeasure { 28 | 29 | /** 30 | * Default value for {@link #resultIfCalcUndefined} = {@value} . 31 | */ 32 | private static final double DEFAULT_RESULT_FOR_UNDIFEND_CALCULATIONS = 0; 33 | 34 | /** 35 | * Value which is returned if the calculation is not defined. 36 | */ 37 | protected double resultIfCalcUndefined; 38 | 39 | /** 40 | * Constructor. 41 | */ 42 | public AbstractUndefinedResultHandlingConfirmationMeasure() { 43 | resultIfCalcUndefined = DEFAULT_RESULT_FOR_UNDIFEND_CALCULATIONS; 44 | } 45 | 46 | /** 47 | * Constructor. 48 | * 49 | * @param resultIfCalcUndefined 50 | * value which should be returned if the calculation is not 51 | * defined. 52 | */ 53 | public AbstractUndefinedResultHandlingConfirmationMeasure(double resultIfCalcUndefined) { 54 | this.resultIfCalcUndefined = resultIfCalcUndefined; 55 | } 56 | 57 | public double getResultIfCalcUndefined() { 58 | return resultIfCalcUndefined; 59 | } 60 | 61 | public void setResultIfCalcUndefined(double resultIfCalcUndefined) { 62 | this.resultIfCalcUndefined = resultIfCalcUndefined; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/OneOne.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | 22 | import com.carrotsearch.hppc.BitSet; 23 | 24 | public class OneOne implements Segmentator { 25 | 26 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 27 | /* 28 | * Code the combinations of elements not with ids but with bits. 01 is 29 | * only the first element, 10 is the second and 11 is the combination of 30 | * both. 31 | */ 32 | int conditions[][] = new int[wordsetSize][wordsetSize - 1]; 33 | int segments[] = new int[wordsetSize]; 34 | int condBit, 35 | condPos, 36 | bit = 1, 37 | pos = 0; 38 | int mask = (1 << wordsetSize) - 1; 39 | BitSet neededCounts = new BitSet(1 << wordsetSize); 40 | while (bit < mask) { 41 | segments[pos] = bit; 42 | neededCounts.set(bit); 43 | condBit = 1; 44 | condPos = 0; 45 | while (condBit < mask) { 46 | if (condBit != bit) { 47 | neededCounts.set(bit + condBit); 48 | conditions[pos][condPos] = condBit; 49 | ++condPos; 50 | } 51 | condBit = condBit << 1; 52 | } 53 | bit = bit << 1; 54 | ++pos; 55 | } 56 | return new SegmentationDefinition(segments, conditions, neededCounts); 57 | } 58 | 59 | @Override 60 | public String getName() { 61 | return "S^{one}_{one}"; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/vector/VectorCreator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.vector; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | import org.aksw.palmetto.data.SubsetVectors; 22 | import org.aksw.palmetto.prob.ProbabilityEstimator; 23 | 24 | /** 25 | * Interface for the vector creation. 26 | * 27 | * @author m.roeder 28 | * 29 | */ 30 | public interface VectorCreator { 31 | 32 | /** 33 | * Creates vectors for the given word sets and their segmentations. 34 | * 35 | * @param wordsets 36 | * @param definitions 37 | * @return 38 | */ 39 | public SubsetVectors[] getVectors(String wordsets[][], SegmentationDefinition definitions[]); 40 | 41 | /** 42 | * Sets the probability estimator used by the vector creator. 43 | * 44 | * @param supplier 45 | */ 46 | public void setProbabilityEstimator(ProbabilityEstimator supplier); 47 | 48 | /** 49 | * Calls {@link ProbabilityEstimator#getName()} of the probability estimator and returns the 50 | * name of 51 | * the estimator. 52 | * 53 | * @return 54 | */ 55 | public String getProbabilityEstimatorName(); 56 | 57 | /** 58 | * Returns the name of the vector space. 59 | * 60 | * @return 61 | */ 62 | public String getVectorSpaceName(); 63 | 64 | /** 65 | * Returns the name of the direct confirmation measure which is used to create the vectors. 66 | * 67 | * @return 68 | */ 69 | public String getVectorCreatorName(); 70 | 71 | /** 72 | * Sets the minimum frequency of the probability estimator. 73 | * 74 | * @param minFrequency 75 | */ 76 | public void setMinFrequency(int minFrequency); 77 | } 78 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/calculations/direct/AbstractProbabilityBasedCalculationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | import org.aksw.palmetto.aggregation.ArithmeticMean; 21 | import org.aksw.palmetto.data.SegmentationDefinition; 22 | import org.aksw.palmetto.data.SubsetProbabilities; 23 | import org.aksw.palmetto.subsets.Segmentator; 24 | import org.junit.Assert; 25 | import org.junit.Test; 26 | 27 | public abstract class AbstractProbabilityBasedCalculationTest { 28 | 29 | private static final double DOUBLE_PRECISION_DELTA = 0.00000001; 30 | 31 | private DirectConfirmationMeasure calculation; 32 | private Segmentator subsetCreator; 33 | private int wordsetSize; 34 | private double probabilities[]; 35 | private double expectedCoherence; 36 | 37 | public AbstractProbabilityBasedCalculationTest(DirectConfirmationMeasure calculation, Segmentator subsetCreator, 38 | int wordsetSize, 39 | double[] probabilities, double expectedCoherence) { 40 | this.calculation = calculation; 41 | this.probabilities = probabilities; 42 | this.wordsetSize = wordsetSize; 43 | this.subsetCreator = subsetCreator; 44 | this.expectedCoherence = expectedCoherence; 45 | } 46 | 47 | @Test 48 | public void test() { 49 | SegmentationDefinition subsets = subsetCreator.getSubsetDefinition(wordsetSize); 50 | SubsetProbabilities subProbs = new SubsetProbabilities(subsets.segments, subsets.conditions, probabilities); 51 | Assert.assertEquals( 52 | expectedCoherence, 53 | (new ArithmeticMean()).summarize(calculation.calculateConfirmationValues(subProbs)), 54 | DOUBLE_PRECISION_DELTA); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/vector/AbstractVectorCreator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.vector; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | import org.aksw.palmetto.data.SubsetProbabilities; 22 | import org.aksw.palmetto.data.SubsetVectors; 23 | import org.aksw.palmetto.prob.ProbabilityEstimator; 24 | 25 | /** 26 | * Abstract class containing the process of the vector creation. 27 | * 28 | * @author m.roeder 29 | * 30 | */ 31 | public abstract class AbstractVectorCreator implements VectorCreator { 32 | 33 | private static final String VECTOR_SPACE_NAME = "V^Top"; 34 | 35 | private ProbabilityEstimator supplier; 36 | 37 | public AbstractVectorCreator(ProbabilityEstimator supplier) { 38 | this.supplier = supplier; 39 | } 40 | 41 | @Override 42 | public SubsetVectors[] getVectors(String[][] wordsets, SegmentationDefinition[] definitions) { 43 | SubsetProbabilities probabilities[] = supplier.getProbabilities(wordsets, definitions); 44 | return createVectors(wordsets, definitions, probabilities); 45 | } 46 | 47 | protected abstract SubsetVectors[] createVectors(String[][] wordsets, SegmentationDefinition[] definitions, 48 | SubsetProbabilities[] probabilities); 49 | 50 | @Override 51 | public String getProbabilityEstimatorName() { 52 | return supplier.getName(); 53 | } 54 | 55 | @Override 56 | public String getVectorSpaceName() { 57 | return VECTOR_SPACE_NAME; 58 | } 59 | 60 | @Override 61 | public void setMinFrequency(int minFrequency) { 62 | supplier.setMinFrequency(minFrequency); 63 | } 64 | 65 | public void setProbabilityEstimator(ProbabilityEstimator supplier) { 66 | this.supplier = supplier; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/window/WindowBasedProbabilityEstimator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.window; 19 | 20 | import org.aksw.palmetto.data.CountedSubsets; 21 | import org.aksw.palmetto.data.SubsetProbabilities; 22 | import org.aksw.palmetto.prob.AbstractProbabilitySupplier; 23 | 24 | public class WindowBasedProbabilityEstimator extends AbstractProbabilitySupplier { 25 | 26 | private long cooccurrenceCountsSums[]; 27 | 28 | public WindowBasedProbabilityEstimator(WindowBasedFrequencyDeterminer freqDeterminer) { 29 | super(freqDeterminer); 30 | cooccurrenceCountsSums = freqDeterminer.getCooccurrenceCounts(); 31 | } 32 | 33 | @Override 34 | protected SubsetProbabilities getProbabilities(CountedSubsets countedSubsets) { 35 | double probabilities[] = new double[countedSubsets.counts.length]; 36 | int wordSet; 37 | for (int i = 1; i < probabilities.length; i = i << 1) { 38 | if (countedSubsets.counts[i] >= minFrequency) { 39 | probabilities[i] = countedSubsets.counts[i] / (double) cooccurrenceCountsSums[0]; 40 | for (int j = 1; j < i; ++j) { 41 | wordSet = i | j; 42 | if (countedSubsets.counts[wordSet] >= minFrequency) 43 | probabilities[wordSet] = countedSubsets.counts[wordSet] 44 | / (double) cooccurrenceCountsSums[Integer.bitCount(wordSet) - 1]; 45 | } 46 | } 47 | } 48 | return new SubsetProbabilities(countedSubsets.segments, countedSubsets.conditions, probabilities); 49 | } 50 | 51 | @Override 52 | public String getName() { 53 | return ((WindowBasedFrequencyDeterminer) freqDeterminer).getSlidingWindowModelName(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/decorator/AbstractSlidingWindowFrequencyDeterminerDecorator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.decorator; 19 | 20 | import org.aksw.palmetto.prob.FrequencyDeterminer; 21 | import org.aksw.palmetto.prob.window.WindowBasedFrequencyDeterminer; 22 | 23 | public abstract class AbstractSlidingWindowFrequencyDeterminerDecorator extends AbstractFrequencyDeterminerDecorator 24 | implements SlidingWindowFrequencyDeterminerDecorator { 25 | 26 | public AbstractSlidingWindowFrequencyDeterminerDecorator(FrequencyDeterminer determiner) { 27 | super(determiner); 28 | } 29 | 30 | @Override 31 | public void setWindowSize(int windowSize) { 32 | if (determiner instanceof WindowBasedFrequencyDeterminer) { 33 | ((WindowBasedFrequencyDeterminer) determiner).setWindowSize(windowSize); 34 | } 35 | } 36 | 37 | @Override 38 | public long[] getCooccurrenceCounts() { 39 | if (determiner instanceof WindowBasedFrequencyDeterminer) { 40 | return ((WindowBasedFrequencyDeterminer) determiner).getCooccurrenceCounts(); 41 | } else { 42 | return null; 43 | } 44 | } 45 | 46 | @Override 47 | public String getSlidingWindowModelName() { 48 | if (determiner instanceof WindowBasedFrequencyDeterminer) { 49 | return ((WindowBasedFrequencyDeterminer) determiner).getSlidingWindowModelName(); 50 | } else { 51 | return null; 52 | } 53 | } 54 | 55 | @Override 56 | public int getWindowSize() { 57 | if (determiner instanceof WindowBasedFrequencyDeterminer) { 58 | return ((WindowBasedFrequencyDeterminer) determiner).getWindowSize(); 59 | } else { 60 | return 0; 61 | } 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/AbstractProbabilitySupplier.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob; 19 | 20 | import org.aksw.palmetto.data.CountedSubsets; 21 | import org.aksw.palmetto.data.SegmentationDefinition; 22 | import org.aksw.palmetto.data.SubsetProbabilities; 23 | 24 | public abstract class AbstractProbabilitySupplier implements 25 | ProbabilityEstimator { 26 | 27 | public static final int DEFAULT_MIN_FREQUENCY = 10; 28 | 29 | protected FrequencyDeterminer freqDeterminer; 30 | protected int minFrequency = DEFAULT_MIN_FREQUENCY; 31 | 32 | protected AbstractProbabilitySupplier(FrequencyDeterminer freqDeterminer) { 33 | this.freqDeterminer = freqDeterminer; 34 | } 35 | 36 | public SubsetProbabilities[] getProbabilities(String wordsets[][], 37 | SegmentationDefinition definitions[]) { 38 | CountedSubsets subsets[] = freqDeterminer.determineCounts(wordsets, 39 | definitions); 40 | SubsetProbabilities probabilities[] = new SubsetProbabilities[subsets.length]; 41 | for (int i = 0; i < subsets.length; i++) { 42 | probabilities[i] = getProbabilities(subsets[i]); 43 | } 44 | return probabilities; 45 | } 46 | 47 | protected abstract SubsetProbabilities getProbabilities( 48 | CountedSubsets countedSubsets); 49 | 50 | public void setMinFrequency(int minFrequency) { 51 | this.minFrequency = minFrequency; 52 | } 53 | 54 | public int getMinFrequency() { 55 | return minFrequency; 56 | } 57 | 58 | public FrequencyDeterminer getFrequencyDeterminer() { 59 | return freqDeterminer; 60 | } 61 | 62 | public void setFrequencyDeterminer(FrequencyDeterminer freqDeterminer) { 63 | this.freqDeterminer = freqDeterminer; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/indirect/CentroidConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | import org.aksw.palmetto.data.SubsetVectors; 21 | 22 | import com.carrotsearch.hppc.IntObjectOpenHashMap; 23 | 24 | @Deprecated 25 | public class CentroidConfirmationMeasure extends CosinusConfirmationMeasure { 26 | 27 | @Override 28 | public String getName() { 29 | return "m_cen"; 30 | } 31 | 32 | @Override 33 | public double[] calculateConfirmationValues(SubsetVectors subsetVectors) { 34 | double values[] = new double[subsetVectors.segments.length]; 35 | 36 | double centroid[] = new double[subsetVectors.vectors[0].length]; 37 | IntObjectOpenHashMap vectorCache = new IntObjectOpenHashMap(); 38 | for (int i = 0; i < subsetVectors.vectors.length; ++i) { 39 | vectorCache.put(1 << i, subsetVectors.vectors[i]); 40 | for (int j = 0; j < centroid.length; j++) { 41 | centroid[j] += subsetVectors.vectors[i][j]; 42 | } 43 | } 44 | // for (int j = 0; j < centroid.length; j++) { 45 | // centroid[j] /= subsetVectors.vectors.length; 46 | // } 47 | double segmentVector[]; 48 | for (int i = 0; i < subsetVectors.segments.length; ++i) { 49 | if (vectorCache.containsKey(subsetVectors.segments[i])) { 50 | segmentVector = vectorCache.lget(); 51 | } else { 52 | segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors); 53 | vectorCache.put(subsetVectors.segments[i], segmentVector); 54 | } 55 | values[i] = calculateSimilarity(segmentVector, centroid); 56 | } 57 | return values; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/calculations/indirect/AbstractVectorBasedCoherenceTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | import org.aksw.palmetto.aggregation.ArithmeticMean; 21 | import org.aksw.palmetto.data.SegmentationDefinition; 22 | import org.aksw.palmetto.data.SubsetVectors; 23 | import org.aksw.palmetto.subsets.Segmentator; 24 | import org.junit.Assert; 25 | import org.junit.Test; 26 | 27 | public abstract class AbstractVectorBasedCoherenceTest { 28 | 29 | private static final double DOUBLE_PRECISION_DELTA = 0.00000001; 30 | 31 | private VectorBasedConfirmationMeasure calculation; 32 | private Segmentator subsetCreator; 33 | private int wordsetSize; 34 | private double[] probabilities; 35 | private double vectors[][]; 36 | private double expectedCoherence; 37 | 38 | public AbstractVectorBasedCoherenceTest(VectorBasedConfirmationMeasure calculation, Segmentator subsetCreator, 39 | int wordsetSize, double[] probabilities, double[][] vectors, double expectedCoherence) { 40 | this.calculation = calculation; 41 | this.probabilities = probabilities; 42 | this.vectors = vectors; 43 | this.wordsetSize = wordsetSize; 44 | this.subsetCreator = subsetCreator; 45 | this.expectedCoherence = expectedCoherence; 46 | } 47 | 48 | @Test 49 | public void test() { 50 | SegmentationDefinition subsets = subsetCreator.getSubsetDefinition(wordsetSize); 51 | SubsetVectors subsetVectors = new SubsetVectors(subsets.segments, subsets.conditions, vectors, probabilities); 52 | Assert.assertEquals( 53 | expectedCoherence, 54 | (new ArithmeticMean()).summarize(calculation.calculateConfirmationValues(subsetVectors)), 55 | DOUBLE_PRECISION_DELTA); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/data/SubsetProbabilities.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.data; 19 | 20 | /** 21 | * This class contains the segmentations and probabilities for a word set. 22 | * 23 | * @author m.roeder 24 | * 25 | */ 26 | public class SubsetProbabilities { 27 | 28 | /** 29 | * Defines the W' of the subset pairs 30 | */ 31 | public int segments[]; 32 | /** 33 | * Defines for every W' in {@link #segments} an array of W* subsets. 34 | */ 35 | public int conditions[][]; 36 | /** 37 | * The marginal probabilities of the single subsets. 38 | * 39 | * The bits of their index are the IDs of the words that are part of the 40 | * subset, e.g., 41 | *
    42 | *
  • probabilities[0] is always 0
  • 43 | *
  • probabilities[1] contains the probability for word 44 | * #1
  • 45 | *
  • probabilities[2] contains the probability for word 46 | * #2
  • 47 | *
  • probabilities[3] contains the probability for a subset 48 | * comprising word #1 and word #2
  • 49 | *
  • ...
  • 50 | *
51 | */ 52 | public double probabilities[]; 53 | 54 | public SubsetProbabilities(int[] segments, int[][] conditions, double[] segmentProbabilities) { 55 | this.segments = segments; 56 | this.conditions = conditions; 57 | this.probabilities = segmentProbabilities; 58 | } 59 | 60 | /** 61 | * @return the segmentProbabilities 62 | */ 63 | public double[] getSegmentProbabilities() { 64 | return probabilities; 65 | } 66 | 67 | /** 68 | * @param segmentProbabilities 69 | * the segmentProbabilities to set 70 | */ 71 | public void setSegmentProbabilities(double[] segmentProbabilities) { 72 | this.probabilities = segmentProbabilities; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /webApp/src/main/java/org/aksw/palmetto/webapp/config/PalmettoConfiguration.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto Web Application - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.webapp.config; 19 | 20 | import org.apache.commons.configuration.CompositeConfiguration; 21 | import org.apache.commons.configuration.Configuration; 22 | import org.apache.commons.configuration.ConfigurationException; 23 | import org.apache.commons.configuration.EnvironmentConfiguration; 24 | import org.apache.commons.configuration.PropertiesConfiguration; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | /** 29 | * Contains the properties needed by Palmetto. Implements the Singleton pattern. 30 | * 31 | * @author m.roeder 32 | * 33 | */ 34 | public class PalmettoConfiguration { 35 | 36 | private static final Logger LOGGER = LoggerFactory.getLogger(PalmettoConfiguration.class); 37 | 38 | private static final String DEFAULT_PALMETTO_PROPERTIES_FILE_NAME = "palmetto.properties"; 39 | 40 | private static Configuration instance = null; 41 | 42 | public static synchronized Configuration getInstance() { 43 | if (instance == null) { 44 | CompositeConfiguration tempConfig = new CompositeConfiguration(); 45 | // Add environmental variables first 46 | tempConfig.addConfiguration(new EnvironmentConfiguration()); 47 | // Add default values for parameters to ensure that all parameters are set 48 | try { 49 | tempConfig.addConfiguration(new PropertiesConfiguration(DEFAULT_PALMETTO_PROPERTIES_FILE_NAME)); 50 | } catch (ConfigurationException e) { 51 | LOGGER.error("Couldnt load Properties from the properties file (\"" + DEFAULT_PALMETTO_PROPERTIES_FILE_NAME 52 | + "\"). This GERBIL instance won't work as expected.", e); 53 | } 54 | instance = tempConfig; 55 | } 56 | return instance; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/ProbabilityEstimator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | import org.aksw.palmetto.data.SubsetProbabilities; 22 | 23 | /** 24 | * This interface defines the methods of a class that estimates the 25 | * probabilities of a given word set. 26 | * 27 | * @author m.roeder 28 | * 29 | */ 30 | public interface ProbabilityEstimator { 31 | 32 | /** 33 | * Estimates the probabilities for the given word sets and the given 34 | * segmentations. 35 | * 36 | * @param wordsets 37 | * word sets for which the probabilities should be estimated 38 | * @param definitions 39 | * definitions how the word sets should be separated 40 | * @return the subset probabilities for the single word sets 41 | */ 42 | public SubsetProbabilities[] getProbabilities(String wordsets[][], SegmentationDefinition definitions[]); 43 | 44 | /** 45 | * Returns the frequency determiner used by this estimator. 46 | * 47 | * @return the frequency determiner used by this estimator 48 | */ 49 | public FrequencyDeterminer getFrequencyDeterminer(); 50 | 51 | /** 52 | * Sets the frequency determiner that should be used by this estimator. 53 | * 54 | * @param determiner 55 | * the frequency determiner used by this estimator 56 | */ 57 | public void setFrequencyDeterminer(FrequencyDeterminer determiner); 58 | 59 | /** 60 | * Returns the name of this probability estimator. 61 | * 62 | * @return the name of this probability estimator 63 | */ 64 | public String getName(); 65 | 66 | /** 67 | * Sets the minimum frequency a word (or word set) must have to get a 68 | * probability > 0. 69 | * 70 | * @param minFrequency 71 | * the minimum frequency of a word 72 | */ 73 | public void setMinFrequency(int minFrequency); 74 | } 75 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/aggregation/Median.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.aggregation; 19 | 20 | import java.util.Arrays; 21 | 22 | import com.carrotsearch.hppc.DoubleArrayList; 23 | 24 | public class Median implements Aggregation { 25 | 26 | @Override 27 | public double summarize(double[] values) { 28 | if (values.length == 0) { 29 | throw new IllegalArgumentException( 30 | "The given array has to have at least one element to determine the modus."); 31 | } 32 | Arrays.sort(values); 33 | if ((values.length & 1) > 0) { 34 | return values[values.length / 2]; 35 | } else { 36 | return (values[values.length / 2] + values[(values.length / 2) - 1]) / 2; 37 | } 38 | } 39 | 40 | @Override 41 | public String getName() { 42 | return "sigma_m"; 43 | } 44 | 45 | @Override 46 | public double summarize(double[] values, double[] weights) { 47 | if (values.length == 0) { 48 | throw new IllegalArgumentException( 49 | "The given array has to have at least one element to determine the modus."); 50 | } 51 | DoubleArrayList weightedValues = new DoubleArrayList(values.length); 52 | for (int i = 0; i < values.length; ++i) { 53 | if (!Double.isNaN(values[i])) { 54 | weightedValues.add(weights[i] * values[i]); 55 | } 56 | } 57 | if (weightedValues.size() == 0) { 58 | return 0; 59 | } 60 | double weightedValuesAsArray[] = weightedValues.toArray(); 61 | Arrays.sort(weightedValuesAsArray); 62 | if ((weightedValuesAsArray.length & 1) > 0) { 63 | return weightedValuesAsArray[weightedValuesAsArray.length / 2]; 64 | } else { 65 | return (weightedValuesAsArray[weightedValuesAsArray.length / 2] + weightedValuesAsArray[(weightedValuesAsArray.length / 2) - 1]) / 2.0; 66 | } 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/subsets/OneOneAndSelf.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | import org.aksw.palmetto.vector.DirectConfirmationBasedVectorCreator; 22 | 23 | import com.carrotsearch.hppc.BitSet; 24 | 25 | /** 26 | * This is just for internal usage by the {@link DirectConfirmationBasedVectorCreator} 27 | * class. 28 | * 29 | * @author Micha 30 | * 31 | */ 32 | public class OneOneAndSelf implements Segmentator { 33 | 34 | public SegmentationDefinition getSubsetDefinition(int wordsetSize) { 35 | /* 36 | * Code the combinations of elements not with ids but with bits. 01 is 37 | * only the first element, 10 is the second and 11 is the combination of 38 | * both. 39 | */ 40 | int conditions[][] = new int[wordsetSize][wordsetSize]; 41 | int segments[] = new int[wordsetSize]; 42 | int condBit, 43 | condPos, 44 | bit = 1, 45 | pos = 0; 46 | int mask = (1 << wordsetSize) - 1; 47 | BitSet neededCounts = new BitSet(1 << wordsetSize); 48 | while (bit < mask) { 49 | segments[pos] = bit; 50 | neededCounts.set(bit); 51 | condBit = 1; 52 | condPos = 0; 53 | if (bit == 1) { 54 | while (condBit < mask) { 55 | neededCounts.set(bit + condBit); 56 | conditions[pos][condPos] = condBit; 57 | ++condPos; 58 | condBit = condBit << 1; 59 | } 60 | } else { 61 | System.arraycopy(conditions[0], 0, conditions[pos], 0, conditions[0].length); 62 | } 63 | bit = bit << 1; 64 | ++pos; 65 | } 66 | return new SegmentationDefinition(segments, conditions, neededCounts); 67 | } 68 | 69 | @Override 70 | public String getName() { 71 | return "S^{one}_{o&s}"; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/vector/ProbabilityBasedVectorCreator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.vector; 19 | 20 | import org.aksw.palmetto.data.SegmentationDefinition; 21 | import org.aksw.palmetto.data.SubsetProbabilities; 22 | import org.aksw.palmetto.data.SubsetVectors; 23 | import org.aksw.palmetto.prob.ProbabilityEstimator; 24 | 25 | /** 26 | * This vector creator uses the marginal probabilities for the vector creation. 27 | * 28 | * @author m.roeder 29 | * 30 | */ 31 | public class ProbabilityBasedVectorCreator extends AbstractVectorCreator { 32 | 33 | private static final String VECTOR_CREATOR_NAME = "V_p"; 34 | 35 | public ProbabilityBasedVectorCreator(ProbabilityEstimator supplier) { 36 | super(supplier); 37 | } 38 | 39 | @Override 40 | public String getVectorCreatorName() { 41 | return VECTOR_CREATOR_NAME; 42 | } 43 | 44 | @Override 45 | protected SubsetVectors[] createVectors(String[][] wordsets, SegmentationDefinition[] definitions, 46 | SubsetProbabilities[] probabilities) { 47 | SubsetVectors vectors[] = new SubsetVectors[wordsets.length]; 48 | double currentVectors[][]; 49 | int bit1; 50 | for (int w = 0; w < wordsets.length; ++w) { 51 | currentVectors = new double[wordsets[w].length][wordsets[w].length]; 52 | for (int i = 0; i < wordsets[w].length; ++i) { 53 | bit1 = 1 << i; 54 | currentVectors[i][i] = probabilities[w].probabilities[bit1]; 55 | for (int j = i + 1; j < wordsets[w].length; ++j) { 56 | currentVectors[i][j] = probabilities[w].probabilities[bit1 | (1 << j)]; 57 | currentVectors[j][i] = currentVectors[i][j]; 58 | } 59 | } 60 | vectors[w] = new SubsetVectors(definitions[w].segments, definitions[w].conditions, currentVectors, 61 | probabilities[w].probabilities); 62 | } 63 | return vectors; 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/calculations/indirect/VectorCreationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.indirect; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.junit.Assert; 24 | import org.junit.Test; 25 | import org.junit.runner.RunWith; 26 | import org.junit.runners.Parameterized; 27 | import org.junit.runners.Parameterized.Parameters; 28 | 29 | @RunWith(Parameterized.class) 30 | public class VectorCreationTest { 31 | private double vectors[][]; 32 | private int vectorId; 33 | private double expectedVector[]; 34 | 35 | private static final double DOUBLE_PRECISION_DELTA = 0.00000001; 36 | 37 | @Parameters 38 | public static Collection data() { 39 | return Arrays.asList(new Object[][] { 40 | { new double[][] { { 1, 0, -1 }, { 1, 2, 3 }, { 1, 1, 1 } }, 1, new double[] { 1, 0, -1 } }, 41 | { new double[][] { { 1, 0, -1 }, { 1, 2, 3 }, { 1, 1, 1 } }, 2, new double[] { 1, 2, 3 } }, 42 | { new double[][] { { 1, 0, -1 }, { 1, 2, 3 }, { 1, 1, 1 } }, 4, new double[] { 1, 1, 1 } }, 43 | { new double[][] { { 1, 0, -1 }, { 1, 2, 3 }, { 1, 1, 1 } }, 3, new double[] { 2, 2, 2 } }, 44 | { new double[][] { { 1, 0, -1 }, { 1, 2, 3 }, { 1, 1, 1 } }, 5, new double[] { 2, 1, 0 } }, 45 | { new double[][] { { 1, 0, -1 }, { 1, 2, 3 }, { 1, 1, 1 } }, 6, new double[] { 2, 3, 4 } }, 46 | { new double[][] { { 1, 0, -1 }, { 1, 2, 3 }, { 1, 1, 1 } }, 7, new double[] { 3, 3, 3 } } }); 47 | } 48 | 49 | public VectorCreationTest(double[][] vectors, int vectorId, double[] expectedVector) { 50 | this.vectors = vectors; 51 | this.vectorId = vectorId; 52 | this.expectedVector = expectedVector; 53 | } 54 | 55 | @Test 56 | public void test() { 57 | AbstractVectorBasedCalculation calculation = new CosinusConfirmationMeasure(); 58 | Assert.assertArrayEquals(expectedVector, calculation.createVector(vectorId, vectors), DOUBLE_PRECISION_DELTA); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/evaluate/correlation/KendallsTauTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.evaluate.correlation; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.junit.Assert; 24 | import org.junit.Test; 25 | import org.junit.runner.RunWith; 26 | import org.junit.runners.Parameterized; 27 | import org.junit.runners.Parameterized.Parameters; 28 | 29 | @RunWith(Parameterized.class) 30 | public class KendallsTauTest { 31 | private double x[]; 32 | private double y[]; 33 | private double expectedCorrelation; 34 | private static final double DOUBLE_PRECISION_DELTA = 0.00000001; 35 | 36 | @Parameters 37 | public static Collection data() { 38 | return Arrays 39 | .asList(new Object[][] { 40 | /* 41 | * C = 19 42 | * D = 5 43 | * X = 2 44 | * Y = 2 45 | * Tau = (19 - 5) / sqrt((19 + 5 + 2) * (19 + 5 + 2)) = 14 / 26 46 | */ 47 | { new double[] { 2.0, 3.0, 3.0, 5.0, 5.5, 8.0, 10.0, 10.0 }, 48 | new double[] { 1.5, 1.5, 4.0, 3.0, 1.0, 5.0, 5.0, 9.5 }, (14.0 / 26.0) }, 49 | /* 50 | * The same as above but with a changed order 51 | */ 52 | { new double[] { 10.0, 10.0, 8.0, 5.5, 5.0, 3.0, 3.0, 2.0 }, 53 | new double[] { 9.5, 5.0, 5.0, 1.0, 3.0, 4.0, 1.5, 1.5 }, (14.0 / 26.0) } }); 54 | } 55 | 56 | public KendallsTauTest(double[] x, double[] y, double expectedCorrelation) { 57 | this.x = x; 58 | this.y = y; 59 | this.expectedCorrelation = expectedCorrelation; 60 | } 61 | 62 | @Test 63 | public void test() { 64 | KendallsTau kendallsTau = new KendallsTau(); 65 | Assert.assertEquals(expectedCorrelation, kendallsTau.calculateRankCorrelation(x, y), DOUBLE_PRECISION_DELTA); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/subsets/OneAnyTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.subsets; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.junit.Test; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.Parameterized; 26 | import org.junit.runners.Parameterized.Parameters; 27 | 28 | @RunWith(Parameterized.class) 29 | public class OneAnyTest extends AbstractSegmentatorTest { 30 | private OneAny subsetCreator; 31 | private int wordSetSize; 32 | private int expectedSegments[]; 33 | private int expectedConditions[][]; 34 | 35 | @Parameters 36 | public static Collection data() { 37 | return Arrays.asList(new Object[][] { 38 | { new OneAny(), 4, new int[] { 1, 2, 4, 8 }, new int[][] { { 2, 4, 8, 6, 10, 12, 14 }, 39 | { 1, 4, 8, 5, 9, 12, 13 }, { 1, 2, 3, 8, 9, 10, 11 }, { 1, 2, 3, 4, 5, 6, 7 } } }, 40 | { new OneAny(2, false), 4, new int[] { 1, 2, 4, 8 }, new int[][] { { 2, 4, 8, 6, 10, 12 }, 41 | { 1, 4, 8, 5, 9, 12 }, { 1, 2, 3, 8, 9, 10 }, { 1, 2, 3, 4, 5, 6 } } }, 42 | { new OneAny(1, false), 4, new int[] { 1, 2, 4, 8 }, new int[][] { { 2, 4, 8 }, 43 | { 1, 4, 8 }, { 1, 2, 8 }, { 1, 2, 4 } } }, 44 | { new OneAny(3, true), 4, new int[] { 1, 2, 4, 8 }, new int[][] { { 2, 4, 8, 6, 10, 12 }, 45 | { 1, 4, 8, 5, 9, 12 }, { 1, 2, 3, 8, 9, 10 }, { 1, 2, 3, 4, 5, 6 } } }, 46 | { new OneAny(2, true), 4, new int[] { 1, 2, 4, 8 }, new int[][] { { 2, 4, 8 }, 47 | { 1, 4, 8 }, { 1, 2, 8 }, { 1, 2, 4 } } } }); 48 | } 49 | 50 | public OneAnyTest(OneAny subsetCreator, int wordSetSize, int[] expectedSegments, int[][] expectedConditions) { 51 | this.subsetCreator = subsetCreator; 52 | this.wordSetSize = wordSetSize; 53 | this.expectedSegments = expectedSegments; 54 | this.expectedConditions = expectedConditions; 55 | } 56 | 57 | @Test 58 | public void test() { 59 | testSubsetCreator(wordSetSize, subsetCreator, expectedSegments, expectedConditions); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/prob/decorator/SimpleFrequencyCachingDeterminerDecorator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob.decorator; 19 | 20 | import java.util.Arrays; 21 | 22 | import org.aksw.palmetto.data.CountedSubsets; 23 | import org.aksw.palmetto.data.SegmentationDefinition; 24 | import org.aksw.palmetto.prob.FrequencyDeterminer; 25 | 26 | import com.carrotsearch.hppc.IntObjectOpenHashMap; 27 | 28 | /** 29 | * This decorator implements a very simple cache. Note that it has 30 | * disadvantages, e.g., it will never stop growing. Thus, the 31 | * {@link org.aksw.palmetto.prob.decorator.FrequencyCachingDeterminerDecorator} 32 | * should be used instead. 33 | * 34 | * @author Michael Röder (roeder@informatik.uni-leipzig.de) 35 | * 36 | */ 37 | public class SimpleFrequencyCachingDeterminerDecorator extends AbstractSlidingWindowFrequencyDeterminerDecorator { 38 | 39 | private IntObjectOpenHashMap cache = new IntObjectOpenHashMap(); 40 | 41 | public SimpleFrequencyCachingDeterminerDecorator(FrequencyDeterminer determiner) { 42 | super(determiner); 43 | } 44 | 45 | @Override 46 | public CountedSubsets[] determineCounts(String[][] wordsets, SegmentationDefinition[] definitions) { 47 | CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length]; 48 | int wordSetHash; 49 | String singleWordSet[][] = new String[1][]; 50 | SegmentationDefinition singleDefinition[] = new SegmentationDefinition[1]; 51 | for (int i = 0; i < definitions.length; ++i) { 52 | wordSetHash = Arrays.hashCode(wordsets[i]); 53 | if (cache.containsKey(wordSetHash)) { 54 | countedSubsets[i] = new CountedSubsets(definitions[i].segments, definitions[i].conditions, 55 | cache.get(wordSetHash)); 56 | } else { 57 | singleWordSet[0] = wordsets[i]; 58 | singleDefinition[0] = definitions[i]; 59 | countedSubsets[i] = this.determiner.determineCounts(singleWordSet, singleDefinition)[0]; 60 | cache.put(wordSetHash, countedSubsets[i].counts); 61 | } 62 | } 63 | return countedSubsets; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/evaluate/correlation/SpearmanTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.evaluate.correlation; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.junit.Assert; 24 | import org.junit.Test; 25 | import org.junit.runner.RunWith; 26 | import org.junit.runners.Parameterized; 27 | import org.junit.runners.Parameterized.Parameters; 28 | 29 | @RunWith(Parameterized.class) 30 | public class SpearmanTest { 31 | private double x[]; 32 | private double y[]; 33 | private double expectedCorrelation; 34 | private static final double DOUBLE_PRECISION_DELTA = 0.00000001; 35 | 36 | @Parameters 37 | public static Collection data() { 38 | return Arrays.asList(new Object[][] { 39 | /* 40 | * Correlation = 1 - (6*26 / 8 * (8² - 1)) = 1 - (156/504) 41 | */ 42 | { new double[] { 2.0, 3.0, 3.0, 5.0, 5.5, 8.0, 10.0, 10.0 }, 43 | new double[] { 1.5, 1.5, 4.0, 3.0, 1.0, 5.0, 5.0, 9.5 }, 1.0 - (156.0 / 504.0) }, 44 | /* 45 | * The same as above but with a changed order 46 | */ 47 | { new double[] { 10.0, 10.0, 8.0, 5.5, 5.0, 3.0, 3.0, 2.0 }, 48 | new double[] { 9.5, 5.0, 5.0, 1.0, 3.0, 4.0, 1.5, 1.5 }, 1.0 - (156.0 / 504.0) }, 49 | /* 50 | * Correlation = 1 - (6*194 / 10 * (10² - 1)) = 1 - (1164/990) 51 | */ 52 | { new double[] { 106.0, 86.0, 100.0, 101.0, 99.0, 103.0, 97.0, 113.0, 112.0, 110.0 }, 53 | new double[] { 7.0, 0.0, 27.0, 50.0, 28.0, 29.0, 20.0, 12.0, 6.0, 17.0 }, 54 | 1.0 - (1164.0 / 990.0) } }); 55 | } 56 | 57 | public SpearmanTest(double[] x, double[] y, double expectedCorrelation) { 58 | this.x = x; 59 | this.y = y; 60 | this.expectedCorrelation = expectedCorrelation; 61 | } 62 | 63 | @Test 64 | public void test() { 65 | Spearman correlation = new Spearman(); 66 | Assert.assertEquals(expectedCorrelation, correlation.calculateRankCorrelation(x, y), DOUBLE_PRECISION_DELTA); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/direct/DifferenceBasedConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | /** 23 | * This confirmation measure calculates the difference between the conditional 24 | * probability of W' given W* abd the marginal probability of W'. result = 25 | * P(W'|W*)-P(W') 26 | * 27 | * @author Michael Röder 28 | * 29 | */ 30 | public class DifferenceBasedConfirmationMeasure implements DirectConfirmationMeasure { 31 | 32 | @Override 33 | public double[] calculateConfirmationValues(SubsetProbabilities subsetProbabilities) { 34 | int pos = 0; 35 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 36 | pos += subsetProbabilities.conditions[i].length; 37 | } 38 | double values[] = new double[pos]; 39 | 40 | double marginalProbability, 41 | conditionalProbability; 42 | pos = 0; 43 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 44 | marginalProbability = subsetProbabilities.probabilities[subsetProbabilities.segments[i]]; 45 | if (marginalProbability > 0) { 46 | for (int j = 0; j < subsetProbabilities.conditions[i].length; ++j) { 47 | if (subsetProbabilities.probabilities[subsetProbabilities.conditions[i][j]] > 0) { 48 | conditionalProbability = subsetProbabilities.probabilities[subsetProbabilities.segments[i] 49 | | subsetProbabilities.conditions[i][j]] 50 | / subsetProbabilities.probabilities[subsetProbabilities.conditions[i][j]]; 51 | } else { 52 | conditionalProbability = 0; 53 | } 54 | values[pos] = conditionalProbability - marginalProbability; 55 | ++pos; 56 | } 57 | } else { 58 | pos += subsetProbabilities.conditions[i].length; 59 | } 60 | } 61 | return values; 62 | } 63 | 64 | @Override 65 | public String getName() { 66 | return "m_d"; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/io/SimpleWordSetReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.io; 19 | 20 | import java.io.FileReader; 21 | import java.io.IOException; 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | import java.util.Scanner; 25 | import java.util.StringTokenizer; 26 | 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | public class SimpleWordSetReader { 31 | 32 | private static final Logger LOGGER = LoggerFactory 33 | .getLogger(SimpleWordSetReader.class); 34 | 35 | public String[][] readWordSets(String inputFile) { 36 | List topics = new ArrayList(); 37 | FileReader reader = null; 38 | Scanner scanner = null; 39 | try { 40 | String[] wordset; 41 | reader = new FileReader(inputFile); 42 | scanner = new Scanner(reader); 43 | while (scanner.hasNextLine()) { 44 | wordset = parseWordSetFromLine(scanner.nextLine()); 45 | if ((wordset != null) && (wordset.length > 0)) { 46 | topics.add(wordset); 47 | } 48 | } 49 | } catch (IOException e) { 50 | LOGGER.error("Error while creating Index. Aborting.", e); 51 | } finally { 52 | if (scanner != null) { 53 | try { 54 | scanner.close(); 55 | } catch (Exception e) { 56 | } 57 | } else { 58 | if (reader != null) { 59 | try { 60 | reader.close(); 61 | } catch (Exception e) { 62 | } 63 | } 64 | } 65 | } 66 | return topics.toArray(new String[topics.size()][]); 67 | } 68 | 69 | private String[] parseWordSetFromLine(String line) { 70 | List topic = new ArrayList(); 71 | StringTokenizer tokenizer = new StringTokenizer(line); 72 | while ((tokenizer.hasMoreTokens())) { 73 | String nextToken = tokenizer.nextToken(); 74 | topic.add(nextToken.toLowerCase()); 75 | } 76 | return topic.toArray(new String[topic.size()]); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/direct/JaccardConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | /** 23 | * This confirmation measure calculates the Jaccard similarity between W' and 24 | * W*. result = P(W',W*)/P(W' v W*) 25 | * 26 | * @author Michael Röder 27 | * 28 | */ 29 | public class JaccardConfirmationMeasure extends AbstractUndefinedResultHandlingConfirmationMeasure { 30 | 31 | public JaccardConfirmationMeasure() { 32 | super(); 33 | } 34 | 35 | public JaccardConfirmationMeasure(double resultIfCalcUndefined) { 36 | super(resultIfCalcUndefined); 37 | } 38 | 39 | @Override 40 | public double[] calculateConfirmationValues(SubsetProbabilities subsetProbabilities) { 41 | int pos = 0; 42 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 43 | pos += subsetProbabilities.conditions[i].length; 44 | } 45 | double values[] = new double[pos]; 46 | 47 | double segmentProbability, 48 | intersectionProbability, 49 | joinProbability; 50 | pos = 0; 51 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 52 | segmentProbability = subsetProbabilities.probabilities[subsetProbabilities.segments[i]]; 53 | for (int j = 0; j < subsetProbabilities.conditions[i].length; ++j) { 54 | joinProbability = segmentProbability 55 | + subsetProbabilities.probabilities[subsetProbabilities.conditions[i][j]]; 56 | intersectionProbability = subsetProbabilities.probabilities[subsetProbabilities.segments[i] 57 | | subsetProbabilities.conditions[i][j]]; 58 | joinProbability -= intersectionProbability; 59 | if (joinProbability > 0) { 60 | values[pos] = intersectionProbability / joinProbability; 61 | } else { 62 | values[pos] = resultIfCalcUndefined; 63 | } 64 | ++pos; 65 | } 66 | } 67 | return values; 68 | } 69 | 70 | @Override 71 | public String getName() { 72 | return "m_j"; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/corpus/lucene/SimpleAnalyzer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.corpus.lucene; 19 | 20 | import java.io.Reader; 21 | import java.util.HashMap; 22 | import java.util.Map; 23 | 24 | import org.apache.lucene.analysis.Analyzer; 25 | import org.apache.lucene.analysis.Tokenizer; 26 | import org.apache.lucene.analysis.core.LowerCaseFilterFactory; 27 | import org.apache.lucene.analysis.pattern.PatternTokenizerFactory; 28 | import org.apache.lucene.analysis.util.AbstractAnalysisFactory; 29 | import org.apache.lucene.util.Version; 30 | 31 | /** 32 | * A simple Lucene Analyzer used for the index creation. 33 | * 34 | * @author m.roeder 35 | * 36 | */ 37 | public class SimpleAnalyzer extends Analyzer { 38 | 39 | private static final Version version = Version.LUCENE_44; 40 | private static final String PATTERN = "([^\\p{Punct}\\p{Space}]+([\\p{Punct}][^\\p{Punct}\\p{Space}]+)*)"; 41 | 42 | private PatternTokenizerFactory tokenizerFactory; 43 | private LowerCaseFilterFactory lowerCaseFilterFactory; 44 | 45 | public SimpleAnalyzer(boolean lowerCase) { 46 | Map parameters = new HashMap(); 47 | parameters.put(PatternTokenizerFactory.PATTERN, PATTERN); 48 | parameters.put(PatternTokenizerFactory.GROUP, "0"); 49 | parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, 50 | version.name()); 51 | tokenizerFactory = new PatternTokenizerFactory(parameters); 52 | if (lowerCase) { 53 | parameters = new HashMap(); 54 | parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, 55 | version.name()); 56 | lowerCaseFilterFactory = new LowerCaseFilterFactory(parameters); 57 | } else { 58 | lowerCaseFilterFactory = null; 59 | } 60 | } 61 | 62 | @Override 63 | protected TokenStreamComponents createComponents(String fieldName, 64 | Reader reader) { 65 | Tokenizer tokenizer = tokenizerFactory.create(reader); 66 | if (lowerCaseFilterFactory != null) { 67 | return new TokenStreamComponents(tokenizer, 68 | lowerCaseFilterFactory.create(tokenizer)); 69 | } else { 70 | return new TokenStreamComponents(tokenizer); 71 | } 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /webApp/src/test/java/org/aksw/palmetto/webapp/PerformanceTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto Web Application - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.webapp; 19 | 20 | import java.io.File; 21 | import java.io.IOException; 22 | import java.util.List; 23 | import java.util.Random; 24 | 25 | import org.aksw.palmetto.Coherence; 26 | import org.aksw.palmetto.corpus.WindowSupportingAdapter; 27 | import org.aksw.palmetto.webapp.config.RootConfig; 28 | import org.apache.commons.io.FileUtils; 29 | 30 | /** 31 | * A simple performance test used to find memory leaks. 32 | * 33 | * @author Michael Röder (roeder@informatik.uni-leipzig.de) 34 | * 35 | */ 36 | public class PerformanceTest { 37 | 38 | private static final String VOCAB_FILE = "../Palmetto.coocmatrix/vocab.tsv"; 39 | 40 | private static final int NUMBER_OF_WORDSETS = 10000; 41 | private static final int NUMBER_OF_TERMS = 10; 42 | 43 | public static void main(String[] args) throws Exception { 44 | PerformanceTest test = new PerformanceTest(); 45 | test.run(); 46 | } 47 | 48 | public void run() throws Exception { 49 | WindowSupportingAdapter luceneAdapter = null; 50 | try { 51 | luceneAdapter = RootConfig.createLuceneAdapter(); 52 | Coherence coherence = RootConfig.createUCICoherence(luceneAdapter); 53 | 54 | Random random = new Random(System.currentTimeMillis()); 55 | String terms[] = loadTerms(); 56 | 57 | System.out.println("Waiting 20 secs..."); 58 | Thread.sleep(20000); 59 | 60 | String words[] = new String[NUMBER_OF_TERMS]; 61 | for (int i = 0; i < NUMBER_OF_WORDSETS; ++i) { 62 | if ((i % 1000) == 0) { 63 | System.out.println("Starting word set #" + i); 64 | } 65 | for (int j = 0; j < words.length; ++j) { 66 | words[j] = terms[random.nextInt(terms.length)]; 67 | } 68 | coherence.calculateCoherences(new String[][] { words }); 69 | } 70 | } finally { 71 | if (luceneAdapter != null) { 72 | luceneAdapter.close(); 73 | } 74 | } 75 | } 76 | 77 | protected String[] loadTerms() throws IOException { 78 | List lines = FileUtils.readLines(new File(VOCAB_FILE), "UTF-8"); 79 | return lines.toArray(new String[lines.size()]); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/prob/AbstractBooleanDocumentSupportingAdapterBasedTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.prob; 19 | 20 | import org.aksw.palmetto.corpus.BooleanDocumentSupportingAdapter; 21 | 22 | import com.carrotsearch.hppc.IntArrayList; 23 | import com.carrotsearch.hppc.IntOpenHashSet; 24 | import com.carrotsearch.hppc.ObjectObjectOpenHashMap; 25 | 26 | public abstract class AbstractBooleanDocumentSupportingAdapterBasedTest implements BooleanDocumentSupportingAdapter { 27 | 28 | protected int wordDocuments[][]; 29 | protected int numberOfDocuments; 30 | 31 | public AbstractBooleanDocumentSupportingAdapterBasedTest(int[][] wordDocuments, int numberOfDocuments) { 32 | this.wordDocuments = wordDocuments; 33 | this.numberOfDocuments = numberOfDocuments; 34 | } 35 | 36 | @Override 37 | public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap wordDocMapping) { 38 | Object keys[] = (Object[]) wordDocMapping.keys; 39 | Object values[] = (Object[]) wordDocMapping.values; 40 | for (int i = 0; i < wordDocMapping.allocated.length; ++i) { 41 | if (wordDocMapping.allocated[i]) { 42 | ((IntOpenHashSet) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]); 43 | } 44 | } 45 | } 46 | 47 | @Override 48 | public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) { 49 | documents.add(wordDocuments[Integer.parseInt(word)]); 50 | } 51 | 52 | @Override 53 | public void getDocumentsWithWords(ObjectObjectOpenHashMap wordDocMapping) { 54 | Object keys[] = (Object[]) wordDocMapping.keys; 55 | Object values[] = (Object[]) wordDocMapping.values; 56 | for (int i = 0; i < wordDocMapping.allocated.length; ++i) { 57 | if (wordDocMapping.allocated[i]) { 58 | ((IntArrayList) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]); 59 | } 60 | } 61 | } 62 | 63 | @Override 64 | public void getDocumentsWithWord(String word, IntArrayList documents) { 65 | documents.add(wordDocuments[Integer.parseInt(word)]); 66 | } 67 | 68 | @Override 69 | public int getNumberOfDocuments() { 70 | return numberOfDocuments; 71 | } 72 | 73 | @Override 74 | public void close() { 75 | // nothing to do 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/calculations/direct/OlssonsCoherenceCalculationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | import java.util.Arrays; 21 | import java.util.Collection; 22 | 23 | import org.aksw.palmetto.subsets.OneAll; 24 | import org.aksw.palmetto.subsets.Segmentator; 25 | import org.junit.runner.RunWith; 26 | import org.junit.runners.Parameterized; 27 | import org.junit.runners.Parameterized.Parameters; 28 | 29 | @RunWith(Parameterized.class) 30 | public class OlssonsCoherenceCalculationTest extends AbstractProbabilityBasedCalculationTest { 31 | 32 | @Parameters 33 | public static Collection data() { 34 | return Arrays.asList(new Object[][] { 35 | /* 36 | * word1 1 1 1 37 | * 38 | * word2 0 1 1 39 | * 40 | * word3 0 1 1 41 | * 42 | * C_o,oneall= P(w_1,w_2,w_3)/P(w_1 or w_2 or w_3) = 2/3 / 1 = 43 | * 2/3 44 | */ 45 | { new OneAll(), 3, 46 | new double[] { 0, 1.0, 2.0 / 3.0, 2.0 / 3.0, 2.0 / 3.0, 2.0 / 3.0, 2.0 / 3.0, 2.0 / 3.0 }, 47 | 2.0 / 3.0 }, 48 | 49 | /* 50 | * word1 0 1 1 51 | * 52 | * word2 1 0 1 53 | * 54 | * word3 1 1 0 55 | * 56 | * C_o,oneall= P(w_1,w_2,w_3)/P(w_1 or w_2 or w_3) = 0 / 1 = 0 57 | */{ new OneAll(), 3, 58 | new double[] { 0, 2.0 / 3.0, 2.0 / 3.0, 1.0 / 3.0, 2.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0, 0 }, 0 }, 59 | /* 60 | * word1 0 0 0 1 61 | * 62 | * word2 0 1 0 1 63 | * 64 | * word3 0 0 1 1 65 | * 66 | * C_o,oneall= P(w_1,w_2,w_3)/P(w_1 or w_2 or w_3) = 1/4 / 3/4 = 67 | * 1/3 68 | */ 69 | { new OneAll(), 3, new double[] { 0, 0.25, 0.5, 0.25, 0.5, 0.25, 0.25, 0.25 }, 1.0 / 3.0 } }); 70 | } 71 | 72 | public OlssonsCoherenceCalculationTest(Segmentator subsetCreator, int wordsetSize, double[] probabilities, 73 | double expectedCoherence) { 74 | super(new OlssonsConfirmationMeasure(), subsetCreator, wordsetSize, probabilities, expectedCoherence); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /palmetto/src/test/java/org/aksw/palmetto/evaluate/rank/RankerTest.java: -------------------------------------------------------------------------------- 1 | package org.aksw.palmetto.evaluate.rank; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.Collection; 6 | import java.util.List; 7 | 8 | import org.junit.Assert; 9 | import org.junit.Test; 10 | import org.junit.runner.RunWith; 11 | import org.junit.runners.Parameterized; 12 | import org.junit.runners.Parameterized.Parameters; 13 | 14 | @RunWith(Parameterized.class) 15 | public class RankerTest { 16 | 17 | private double[] values; 18 | private double[] expectedRanksAsc; 19 | private double[] expectedRanksDesc; 20 | 21 | public RankerTest(double[] values, double[] expectedRanksAsc, double[] expectedRanksDesc) { 22 | this.values = values; 23 | this.expectedRanksAsc = expectedRanksAsc; 24 | this.expectedRanksDesc = expectedRanksDesc; 25 | } 26 | 27 | @Test 28 | public void test() { 29 | // Test ascending 30 | Ranker ranker = new Ranker(); 31 | double[] localValues = Arrays.copyOf(values, values.length); 32 | double[] ranks = ranker.rank(localValues, true); 33 | try { 34 | Assert.assertArrayEquals(expectedRanksAsc, ranks, 0.00001); 35 | } catch (AssertionError e) { 36 | System.err.println("Expected " + Arrays.toString(expectedRanksAsc) + " but got " + Arrays.toString(ranks)); 37 | throw e; 38 | } 39 | // Test descending 40 | System.arraycopy(values, 0, localValues, 0, values.length); 41 | ranks = ranker.rank(localValues, false); 42 | try { 43 | Assert.assertArrayEquals(expectedRanksDesc, ranks, 0.00001); 44 | } catch (AssertionError e) { 45 | System.err.println("Expected " + Arrays.toString(expectedRanksDesc) + " but got " + Arrays.toString(ranks)); 46 | throw e; 47 | } 48 | } 49 | 50 | @Parameters 51 | public static Collection data() { 52 | List testCases = new ArrayList<>(); 53 | testCases.add(new Object[] { new double[] { 1, 2, 3 }, new double[] { 1, 2, 3 }, new double[] { 3, 2, 1 } }); 54 | testCases.add( 55 | new Object[] { new double[] { -99, 0, 3354 }, new double[] { 1, 2, 3 }, new double[] { 3, 2, 1 } }); 56 | testCases.add( 57 | new Object[] { new double[] { 1, 2, 2 }, new double[] { 1, 2.5, 2.5 }, new double[] { 3, 1.5, 1.5 } }); 58 | testCases.add(new Object[] { new double[] { 1, 1, 1 }, new double[] { 2, 2, 2 }, new double[] { 2, 2, 2 } }); 59 | testCases.add( 60 | new Object[] { new double[] { 1, 2, Double.NaN }, new double[] { 1, 2, 3 }, new double[] { 2, 1, 3 } }); 61 | testCases.add(new Object[] { new double[] { 1, Double.NaN, Double.NaN }, new double[] { 1, 2.5, 2.5 }, 62 | new double[] { 1, 2.5, 2.5 } }); 63 | testCases.add(new Object[] { new double[] { 1, Double.NaN, Double.parseDouble("NaN"), Double.NaN, 2 }, 64 | new double[] { 1, 4, 4, 4, 2 }, new double[] { 2, 4, 4, 4, 1 } }); 65 | testCases.add(new Object[] { new double[] { Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.NaN }, 66 | new double[] { 2, 1, 3 }, new double[] { 1, 2, 3 } }); 67 | return testCases; 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /palmetto/src/main/java/org/aksw/palmetto/calculations/direct/LogJaccardConfirmationMeasure.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Palmetto - Palmetto is a quality measure tool for topics. 3 | * Copyright © 2014 Data Science Group (DICE) (michael.roeder@uni-paderborn.de) 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with this program. If not, see . 17 | */ 18 | package org.aksw.palmetto.calculations.direct; 19 | 20 | import org.aksw.palmetto.data.SubsetProbabilities; 21 | 22 | /** 23 | * This confirmation measure calculates the logarithm of the Jaccard similarity 24 | * between W' and W*. result = log((P(W',W*) + e)/P(W' v W*)) 25 | * 26 | * The e is defined by {@link LogBasedCalculation#EPSILON}. 27 | * 28 | * @author Michael Röder 29 | * 30 | */ 31 | public class LogJaccardConfirmationMeasure extends AbstractUndefinedResultHandlingConfirmationMeasure implements 32 | LogBasedCalculation { 33 | 34 | public LogJaccardConfirmationMeasure() { 35 | super(); 36 | } 37 | 38 | public LogJaccardConfirmationMeasure(double resultIfCalcUndefined) { 39 | super(resultIfCalcUndefined); 40 | } 41 | 42 | @Override 43 | public double[] calculateConfirmationValues(SubsetProbabilities subsetProbabilities) { 44 | int pos = 0; 45 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 46 | pos += subsetProbabilities.conditions[i].length; 47 | } 48 | double values[] = new double[pos]; 49 | 50 | double segmentProbability, 51 | intersectionProbability, 52 | joinProbability; 53 | pos = 0; 54 | for (int i = 0; i < subsetProbabilities.segments.length; ++i) { 55 | segmentProbability = subsetProbabilities.probabilities[subsetProbabilities.segments[i]]; 56 | for (int j = 0; j < subsetProbabilities.conditions[i].length; ++j) { 57 | joinProbability = segmentProbability 58 | + subsetProbabilities.probabilities[subsetProbabilities.conditions[i][j]]; 59 | intersectionProbability = subsetProbabilities.probabilities[subsetProbabilities.segments[i] 60 | | subsetProbabilities.conditions[i][j]]; 61 | joinProbability -= intersectionProbability; 62 | if (joinProbability > 0) { 63 | values[pos] = Math.log((intersectionProbability + LogBasedCalculation.EPSILON) / joinProbability); 64 | } else { 65 | values[pos] = resultIfCalcUndefined; 66 | } 67 | ++pos; 68 | } 69 | } 70 | return values; 71 | } 72 | 73 | @Override 74 | public String getName() { 75 | return "m_lj"; 76 | } 77 | } 78 | --------------------------------------------------------------------------------