├── .classpath ├── .gitignore ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── LICENSE ├── README.md ├── lib ├── Jama-1.0.3.jar ├── commons-io-2.3.jar ├── commons-lang3-3.1.jar ├── commons-logging-1.2.jar ├── guava-14.0-rc1.jar ├── httpclient-4.3.1.jar ├── httpcore-4.3.jar ├── httpmime-4.3.1.jar ├── javaml-0.1.5.jar ├── json-simple-1.1.1.jar ├── mallet-deps.jar ├── mallet.jar ├── mysql-connector-java-5.1.36-bin.jar ├── noggit-0.5.jar ├── opencsv-2.3.jar ├── slf4j-api-1.7.6.jar ├── solr-solrj-4.10.1.jar ├── vecmath-1.5.1.jar ├── wstx-asl-3.2.7.jar └── zookeeper-3.4.6.jar └── src ├── common ├── Bookmark.java ├── CalculationType.java ├── CooccurenceMatrix.java ├── DBManager.java ├── DoubleMapComparator.java ├── DoubleMapComparatorGeneric.java ├── DoubleMapComparatorKeyString.java ├── Features.java ├── IntMapComparator.java ├── MapUtil.java ├── MemoryThread.java ├── MergeUtil.java ├── PearsonSimilarityCalculator.java ├── PerformanceMeasurement.java ├── PredictionData.java ├── Similarity.java ├── SolrConnector.java ├── TimeUtil.java └── Utilities.java ├── engine ├── Algorithm.java ├── BaseLevelLearningCollectiveEngine.java ├── BaseLevelLearningEngine.java ├── EngineInterface.java ├── EngineUtils.java ├── EntityRecommenderEngine.java ├── EntityType.java ├── LanguageModelEngine.java ├── MostPopularCollectiveEngine.java ├── ResourceRecommenderEngine.java ├── TagRecommenderEvalEngine.java ├── ThreeLayersCollectiveEngine.java ├── ThreeLayersEngine.java └── UserRecommenderEngine.java ├── file ├── BookmarkReader.java ├── BookmarkSplitter.java ├── BookmarkWriter.java ├── PredictionFileReader.java ├── PredictionFileWriter.java ├── ResultSerializer.java ├── postprocessing │ └── CatDescFiltering.java ├── preprocessing │ ├── BibBookmark.java │ ├── BibsonomyProcessor.java │ ├── CiteULikeProcessor.java │ ├── CoreFiltering.java │ ├── JKULFMProcessor.java │ ├── JSONProcessor.java │ ├── LastFMProcessor.java │ ├── MovielensProcessor.java │ ├── PintsProcessor.java │ └── TensorProcessor.java └── stemming │ ├── Among.java │ ├── EnglishStemmer.java │ ├── SnowballProgram.java │ └── SnowballStemmer.java ├── itemrecommendations ├── CFResourceCalculator.java ├── CIRTTCalculator.java ├── HuangApproach.java ├── HuangCalculator.java ├── MPResourceCalculator.java ├── SustainCalculator.java ├── ZhengApproach.java └── ZhengCalculator.java ├── processing ├── BLLCalculator.java ├── CFTagRecommender.java ├── ContentBasedCalculator.java ├── FolkRankCalculator.java ├── GIRPTMCalculator.java ├── MPCalculator.java ├── MPurCalculator.java ├── MalletCalculator.java ├── MetricsCalculator.java ├── RecencyCalculator.java ├── ThreeLTCalculator.java ├── analyzing │ ├── ReuseProbValue.java │ ├── TagReuseProbAnalyzer.java │ ├── UserTagDistribution.java │ └── UserTagProperties.java ├── folkrank │ ├── APRFolkRankResult.java │ ├── FactPreprocessor.java │ ├── FactReader.java │ ├── FactReaderFactPreprocessor.java │ ├── FactReadingException.java │ ├── FolkRankAlgorithm.java │ ├── FolkRankData.java │ ├── FolkRankParam.java │ ├── FolkRankPref.java │ ├── FolkRankResult.java │ ├── ItemWithWeight.java │ ├── LeavePostOutFolkRankDataDuplicator.java │ ├── SeparatedWeightInitializationStrategy.java │ ├── StandardFolkRankResult.java │ ├── WeightInitializationStrategy.java │ └── WikipediaFactReader.java ├── hashtag │ ├── HashtagRecommendationEngine.java │ ├── TagRecommendationUtil.java │ ├── analysis │ │ ├── ProcessFrequencyRecency.java │ │ └── ProcessFrequencyRecencySocial.java │ ├── baseline │ │ ├── ContentPersonalTemporalCalculator.java │ │ ├── CosineSimilarityCalculator.java │ │ ├── HashtagEntropyCalculator.java │ │ ├── HashtagEntropyCalculatorTest.java │ │ ├── HashtagUtil.java │ │ ├── MalletCalculatorTweet.java │ │ ├── PersonalisedSimilarityCalculator.java │ │ ├── PersonalisedSimilarityCalculatorTest.java │ │ ├── PersonalisedTFIDFCalculator.java │ │ ├── UserTFIDFVectorCalculator.java │ │ ├── Vector.java │ │ ├── VectorUtil.java │ │ └── VectorUtilTest.java │ ├── social │ │ ├── RelationItem.java │ │ ├── SocialBLLCalculator.java │ │ ├── SocialFrequencyCalculator.java │ │ ├── SocialHybridCalculator.java │ │ ├── SocialInitEngine.java │ │ ├── SocialLinkWeightCalculator.java │ │ ├── SocialRecencyRecommender.java │ │ └── SocialStrengthCalculator.java │ └── solr │ │ ├── CFSolrHashtagCalculator.java │ │ ├── SolrHashtagCalculator.java │ │ └── Tweet.java └── musicrec │ ├── ArtistCFRecommender.java │ ├── BLL_CFRecommender.java │ └── MusicCFRecommender.java ├── test └── Pipeline.java └── userrecommender ├── SpearAlgorithm.java ├── SpearAlgorithmResult.java ├── SpearCalculator.java ├── TestSpearCalculator.java └── UserActivity.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # idea intellij 2 | .idea 3 | *.iml 4 | /bin/ 5 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | TagRecommender 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.8 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.source=1.8 13 | -------------------------------------------------------------------------------- /lib/Jama-1.0.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/Jama-1.0.3.jar -------------------------------------------------------------------------------- /lib/commons-io-2.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/commons-io-2.3.jar -------------------------------------------------------------------------------- /lib/commons-lang3-3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/commons-lang3-3.1.jar -------------------------------------------------------------------------------- /lib/commons-logging-1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/commons-logging-1.2.jar -------------------------------------------------------------------------------- /lib/guava-14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/guava-14.0-rc1.jar -------------------------------------------------------------------------------- /lib/httpclient-4.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/httpclient-4.3.1.jar -------------------------------------------------------------------------------- /lib/httpcore-4.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/httpcore-4.3.jar -------------------------------------------------------------------------------- /lib/httpmime-4.3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/httpmime-4.3.1.jar -------------------------------------------------------------------------------- /lib/javaml-0.1.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/javaml-0.1.5.jar -------------------------------------------------------------------------------- /lib/json-simple-1.1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/json-simple-1.1.1.jar -------------------------------------------------------------------------------- /lib/mallet-deps.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/mallet-deps.jar -------------------------------------------------------------------------------- /lib/mallet.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/mallet.jar -------------------------------------------------------------------------------- /lib/mysql-connector-java-5.1.36-bin.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/mysql-connector-java-5.1.36-bin.jar -------------------------------------------------------------------------------- /lib/noggit-0.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/noggit-0.5.jar -------------------------------------------------------------------------------- /lib/opencsv-2.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/opencsv-2.3.jar -------------------------------------------------------------------------------- /lib/slf4j-api-1.7.6.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/slf4j-api-1.7.6.jar -------------------------------------------------------------------------------- /lib/solr-solrj-4.10.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/solr-solrj-4.10.1.jar -------------------------------------------------------------------------------- /lib/vecmath-1.5.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/vecmath-1.5.1.jar -------------------------------------------------------------------------------- /lib/wstx-asl-3.2.7.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/wstx-asl-3.2.7.jar -------------------------------------------------------------------------------- /lib/zookeeper-3.4.6.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learning-layers/TagRec/811793ff097b936d9e6f8fa31cba604281926690/lib/zookeeper-3.4.6.jar -------------------------------------------------------------------------------- /src/common/CalculationType.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package common; 22 | 23 | public enum CalculationType { 24 | NONE, // BLL 25 | USER_TO_RESOURCE, // BLLac 26 | USER_TO_RESOURCE_ONLY, // AC 27 | RESOURCE_TO_USER, 28 | RESOURCE_TO_USER_ONLY, 29 | BOTH, 30 | MUSIC 31 | } -------------------------------------------------------------------------------- /src/common/DBManager.java: -------------------------------------------------------------------------------- 1 | package common; 2 | 3 | import java.sql.Connection; 4 | import java.sql.DriverManager; 5 | import java.sql.ResultSet; 6 | import java.sql.SQLException; 7 | import java.sql.Statement; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | import file.preprocessing.BibBookmark; 12 | 13 | public class DBManager { 14 | private String dbName; 15 | private Connection connect = null; 16 | private Statement statement = null; 17 | private ResultSet resultSet = null; 18 | 19 | public DBManager(String dbname) { 20 | try { 21 | Class.forName("com.mysql.jdbc.Driver"); 22 | // Setup the connection with the DB 23 | dbName = dbname; 24 | connect = DriverManager.getConnection("jdbc:mysql://localhost/" 25 | + dbname + "?user=root"); 26 | } catch (Exception e) { 27 | e.printStackTrace(); 28 | } 29 | } 30 | 31 | public List getBibBookmarks(String tableName, String idFieldName, String urlFieldName) { 32 | List bookmarks = new ArrayList(); 33 | 34 | try { 35 | statement = connect.createStatement(); 36 | resultSet = statement.executeQuery("select * from " + dbName + "." + tableName); 37 | while (resultSet.next()) { 38 | BibBookmark b = new BibBookmark(); 39 | b.id = resultSet.getString(idFieldName); 40 | b.urlHash = resultSet.getString(urlFieldName); 41 | bookmarks.add(b); 42 | } 43 | } catch (SQLException e) { 44 | e.printStackTrace(); 45 | } 46 | 47 | return bookmarks; 48 | } 49 | 50 | public void close() { 51 | try { 52 | if (resultSet != null) { 53 | resultSet.close(); 54 | } 55 | if (statement != null) { 56 | statement.close(); 57 | } 58 | if (connect != null) { 59 | connect.close(); 60 | } 61 | } catch (Exception e) { 62 | 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/common/DoubleMapComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package common; 22 | 23 | import java.util.Comparator; 24 | import java.util.Map; 25 | 26 | public class DoubleMapComparator implements Comparator { 27 | 28 | private Map map; 29 | 30 | public DoubleMapComparator(Map map) { 31 | this.map = map; 32 | } 33 | 34 | @Override 35 | public int compare(Integer key1, Integer key2) { 36 | Double val1 = this.map.get(key1); 37 | Double val2 = this.map.get(key2); 38 | if (val1 != null && val2 != null) { 39 | return (val1 >= val2 ? - 1 : 1); 40 | } 41 | return 0; 42 | } 43 | } -------------------------------------------------------------------------------- /src/common/DoubleMapComparatorGeneric.java: -------------------------------------------------------------------------------- 1 | package common; 2 | 3 | import java.util.Comparator; 4 | import java.util.Map; 5 | 6 | public class DoubleMapComparatorGeneric implements Comparator { 7 | private Map map; 8 | 9 | public DoubleMapComparatorGeneric(Map map) { 10 | this.map = map; 11 | } 12 | 13 | @Override 14 | public int compare(T key1, T key2) { 15 | Double val1 = this.map.get(key1); 16 | Double val2 = this.map.get(key2); 17 | if (val1 != null && val2 != null) { 18 | return (val1 >= val2 ? - 1 : 1); 19 | } 20 | return 0; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/common/DoubleMapComparatorKeyString.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package common; 22 | 23 | import java.util.Comparator; 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | 27 | public class DoubleMapComparatorKeyString implements Comparator { 28 | 29 | private Map map; 30 | 31 | public DoubleMapComparatorKeyString(Map map) { 32 | this.map = map; 33 | } 34 | 35 | @Override 36 | public int compare(String key1, String key2) { 37 | Double val1 = this.map.get(key1); 38 | Double val2 = this.map.get(key2); 39 | if (val1 != null && val2 != null) { 40 | return (val1 >= val2 ? - 1 : 1); 41 | } 42 | return 0; 43 | } 44 | } -------------------------------------------------------------------------------- /src/common/Features.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package common; 22 | 23 | public enum Features { 24 | 25 | TAGS, 26 | TOPICS, 27 | ENTITIES, 28 | // Zheng 29 | TAG_ENTITIES, 30 | TIME_ENTITIES, 31 | TAG_TIME_ENTITIES 32 | } 33 | -------------------------------------------------------------------------------- /src/common/IntMapComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | package common; 21 | 22 | import java.util.Comparator; 23 | import java.util.Map; 24 | 25 | public class IntMapComparator implements Comparator { 26 | 27 | private Map map; 28 | 29 | public IntMapComparator(Map map) { 30 | this.map = map; 31 | } 32 | 33 | @Override 34 | public int compare(Integer key1, Integer key2) { 35 | Integer val1 = this.map.get(key1); 36 | Integer val2 = this.map.get(key2); 37 | if (val1 != null && val2 != null) { 38 | return (val1 >= val2 ? - 1 : 1); 39 | } 40 | return 0; 41 | } 42 | } -------------------------------------------------------------------------------- /src/common/MapUtil.java: -------------------------------------------------------------------------------- 1 | package common; 2 | 3 | import java.util.*; 4 | 5 | public class MapUtil 6 | { 7 | public static > Map sortByValue( Map map ) 8 | { 9 | List> list = 10 | new LinkedList>( map.entrySet() ); 11 | Collections.sort( list, new Comparator>() 12 | { 13 | public int compare( Map.Entry o1, Map.Entry o2 ) 14 | { 15 | return -(o1.getValue()).compareTo( o2.getValue() ); 16 | } 17 | } ); 18 | 19 | Map result = new LinkedHashMap(); 20 | for (Map.Entry entry : list) 21 | { 22 | result.put( entry.getKey(), entry.getValue() ); 23 | } 24 | return result; 25 | } 26 | 27 | public static void normalizeMap(Map map) { 28 | double denom = 0.0; 29 | for (Map.Entry e : map.entrySet()) { 30 | denom += Math.exp(e.getValue()); 31 | } 32 | for (Map.Entry e : map.entrySet()) { 33 | e.setValue(Math.exp(e.getValue()) / denom); 34 | } 35 | } 36 | 37 | public static void normalizeMap(Map map, double beta) { 38 | double denom = 0.0; 39 | for (Map.Entry e : map.entrySet()) { 40 | denom += Math.exp(e.getValue()); 41 | denom += e.getValue(); 42 | } 43 | for (Map.Entry e : map.entrySet()) { 44 | e.setValue(Math.exp(e.getValue()) / denom * beta); 45 | e.setValue(e.getValue() / denom * beta); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/common/MemoryThread.java: -------------------------------------------------------------------------------- 1 | package common; 2 | 3 | import java.util.TimerTask; 4 | 5 | public class MemoryThread extends TimerTask { 6 | 7 | public final static int TIME_SPAN = 5000; 8 | 9 | private long maxMemory = -1; 10 | 11 | public MemoryThread() { 12 | Runtime.getRuntime().gc(); 13 | } 14 | 15 | private void trackMaxMemory() { 16 | // Get the Java runtime 17 | Runtime runtime = Runtime.getRuntime(); 18 | // Run the garbage collector 19 | //runtime.gc(); 20 | // Calculate the used memory 21 | long memory = runtime.totalMemory() - runtime.freeMemory(); 22 | if (memory > this.maxMemory) { 23 | this.maxMemory = memory; 24 | } 25 | } 26 | 27 | public void run() { 28 | trackMaxMemory(); 29 | } 30 | 31 | public long getMaxMemory() { 32 | trackMaxMemory(); 33 | return this.maxMemory; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/common/MergeUtil.java: -------------------------------------------------------------------------------- 1 | package common; 2 | 3 | import java.util.LinkedHashMap; 4 | import java.util.Map; 5 | import java.util.TreeMap; 6 | 7 | public class MergeUtil { 8 | 9 | public static Map mergeMapsWithThreshold(Map srcMap, Map targetMap, int limit) { 10 | Map resultMap = new LinkedHashMap(); 11 | Map sortedTargetMap = MapUtil.sortByValue(targetMap); 12 | double threshold = 0.0; 13 | for (Map.Entry entry : sortedTargetMap.entrySet()) { 14 | threshold = entry.getValue(); 15 | break; 16 | } 17 | System.out.println(threshold); 18 | 19 | for (Map.Entry srcEntry : srcMap.entrySet()) { 20 | if (srcEntry.getValue() >= threshold) { 21 | resultMap.put(srcEntry.getKey(), srcEntry.getValue()); 22 | } else { 23 | break; 24 | } 25 | } 26 | for (Map.Entry targetEntry: sortedTargetMap.entrySet()) { 27 | if (resultMap.size() < limit) { 28 | if (!resultMap.containsKey(targetEntry.getKey())) { 29 | resultMap.put(targetEntry.getKey(), targetEntry.getValue()); 30 | } 31 | } else { 32 | break; 33 | } 34 | } 35 | 36 | return resultMap; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/common/PearsonSimilarityCalculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package common; 22 | 23 | import java.util.ArrayList; 24 | import java.util.HashMap; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | /** 29 | * Calculator for the Pearson similarity between two user ratings 30 | * @author elacic 31 | * 32 | */ 33 | public class PearsonSimilarityCalculator { 34 | 35 | /** 36 | * Calculates the Pearson-similarity based on the resource ratings of a user and the resource ratings of his neighbor 37 | * @param userRatings resource ratings of the user 38 | * @param neighborRatings resource ratings of the user's neighbor 39 | * @return the Pearson similarity 40 | */ 41 | public static double getPearsonSim(List userRatings, List neighborRatings) { 42 | double differenceSum = 0.0; 43 | double userSquareDifferenceSum = 0.0; 44 | double neighboorSquareDifferenceSum = 0.0; 45 | 46 | Map> coratedItems = new HashMap>(); 47 | 48 | double userRatingAverage = calculateAverageAndFillCoratedItems(userRatings, coratedItems); 49 | double neighboorRatingAverage = calculateAverageAndFillCoratedItems(neighborRatings, coratedItems); 50 | 51 | for (Integer resourceId : coratedItems.keySet()) { 52 | List resourceRatings = coratedItems.get(resourceId); 53 | 54 | if (resourceRatings.size() < 2) { 55 | continue; 56 | } 57 | 58 | Double userResourceRating = resourceRatings.get(0); 59 | Double neighboorResourceRating = resourceRatings.get(1); 60 | 61 | differenceSum += (userResourceRating - userRatingAverage) * (neighboorResourceRating - neighboorRatingAverage); 62 | 63 | userSquareDifferenceSum += Math.pow( (userResourceRating - userRatingAverage) , 2); 64 | neighboorSquareDifferenceSum += Math.pow( (neighboorResourceRating - neighboorRatingAverage) , 2); 65 | } 66 | 67 | return differenceSum / (Math.sqrt(userSquareDifferenceSum) * Math.sqrt(neighboorSquareDifferenceSum)); 68 | } 69 | 70 | /** 71 | * Calculates the average of a user's ratings and fills the coratedItems map 72 | * @param userRatings 73 | * @param coratedItems 74 | * @return 75 | */ 76 | private static double calculateAverageAndFillCoratedItems(List userRatings, Map> coratedItems) { 77 | double ratingSum = 0.0; 78 | for (Bookmark userRating : userRatings) { 79 | double filteredRating = filterRating(userRating.getRating()); 80 | ratingSum += filteredRating; 81 | 82 | fillCoratedItems(coratedItems, userRating.getResourceID(), filteredRating); 83 | } 84 | return ratingSum / userRatings.size(); 85 | } 86 | 87 | /** 88 | * Fills the mapping on the rating for a resource 89 | * @param coratedItems mapping for resource rating which will be filled 90 | * @param resource resource that is rated 91 | * @param filteredRating rating for the resource 92 | */ 93 | private static void fillCoratedItems(Map> coratedItems, Integer resource, double filteredRating) { 94 | List resourceRating = null; 95 | 96 | if (coratedItems.containsKey(resource)) { 97 | resourceRating = coratedItems.get(resource); 98 | } else { 99 | resourceRating = new ArrayList(); 100 | } 101 | 102 | resourceRating.add(filteredRating); 103 | coratedItems.put(resource, resourceRating); 104 | } 105 | 106 | /** 107 | * For read only ratings which were set to -1 by convention in the CUL dataset 108 | * @param rating rating to be filtered 109 | * @return filtered rating 110 | */ 111 | private static double filterRating(double rating) { 112 | if (rating == -1) { 113 | return 3; 114 | } 115 | 116 | return rating; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/common/PerformanceMeasurement.java: -------------------------------------------------------------------------------- 1 | package common; 2 | 3 | public class PerformanceMeasurement { 4 | 5 | private static final long MEGABYTE = 1024L * 1024L; 6 | 7 | public static String addTimeMeasurement(String performance, boolean clearString, long trainingTime, long testTime, int sampleSize) { 8 | if (clearString) { 9 | performance = ""; 10 | } 11 | performance += ("Full training time: " + trainingTime + "\n"); 12 | performance += ("Full test time: " + testTime + "\n"); 13 | performance += ("Average test time: " + (testTime / (double)sampleSize)) + "\n"; 14 | performance += ("Total time: " + (trainingTime + testTime) + "\n"); 15 | return performance; 16 | } 17 | 18 | public static String addMemoryMeasurement(String performance, boolean clearString, long memory) { 19 | if (clearString) { 20 | performance = ""; 21 | } 22 | performance += ("Memory in bytes: " + memory + "\n"); 23 | performance += ("Memory in mBytes: " + bytesToMegabytes(memory) + "\n"); 24 | return performance; 25 | } 26 | 27 | public static String addCurrentMemoryMeasurement(String performance, boolean clearString, boolean gc) { 28 | if (clearString) { 29 | performance = ""; 30 | } 31 | // Get the Java runtime 32 | Runtime runtime = Runtime.getRuntime(); 33 | if (gc) { 34 | // Run the garbage collector 35 | runtime.gc(); 36 | } 37 | // Calculate the used memory 38 | long memory = runtime.totalMemory() - runtime.freeMemory(); 39 | performance += ("Memory in bytes: " + memory + "\n"); 40 | performance += ("Memory in mBytes: " + bytesToMegabytes(memory) + "\n"); 41 | return performance; 42 | } 43 | 44 | private static long bytesToMegabytes(long bytes) { 45 | return bytes / MEGABYTE; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/common/Similarity.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package common; 22 | 23 | public enum Similarity { 24 | 25 | JACCARD, 26 | COSINE, 27 | BM25 28 | } 29 | -------------------------------------------------------------------------------- /src/common/TimeUtil.java: -------------------------------------------------------------------------------- 1 | package common; 2 | 3 | public class TimeUtil { 4 | 5 | public static final int SECOND = 0; 6 | public static final int MINUTE = 1; 7 | public static final int HOUR = 2; 8 | public static final int DAY = 3; 9 | public static final int FIFTEEN_DAYS = 4; 10 | public static final int MONTH = 5; 11 | public static final int WEEK = 6; 12 | 13 | /** 14 | * get duration count for the dataset. 15 | * @param duration 16 | * @param granularityLevel 17 | * @return 18 | */ 19 | public static int getDurationAtGranularity(int duration, int granularityLevel){ 20 | int time_count = 0; 21 | int secondsInSeconds = 1; 22 | int secondsInMinute = 60; 23 | int secondsInHour = 60 * 60; 24 | int secondsInDay = 24 * 60 * 60; 25 | int secondsInWeek = 7 * 24 * 60 * 60; 26 | int secondsInFifteenDays = 15 * 24 * 60 * 60; 27 | int secondsInMonth = 30 * 24 * 60 * 60; 28 | switch(granularityLevel){ 29 | case SECOND: 30 | time_count = duration / secondsInSeconds; 31 | break; 32 | case MINUTE: 33 | time_count = duration / secondsInMinute; 34 | break; 35 | case HOUR: 36 | time_count = duration / secondsInHour; 37 | break; 38 | case DAY: 39 | time_count = duration / secondsInDay; 40 | break; 41 | case FIFTEEN_DAYS: 42 | time_count = duration / secondsInFifteenDays; 43 | break; 44 | case MONTH: 45 | time_count = duration / secondsInMonth; 46 | break; 47 | case WEEK: 48 | time_count = duration / secondsInWeek; 49 | break; 50 | } 51 | return time_count; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/engine/Algorithm.java: -------------------------------------------------------------------------------- 1 | package engine; 2 | 3 | public enum Algorithm { 4 | MP, 5 | BLL, 6 | BLLac, 7 | BLLacMPr, 8 | BLLcoll, 9 | MPu, 10 | MPr, 11 | MPur, 12 | THREEL, 13 | THREELT, 14 | THREELTMPr, 15 | THREELcoll, 16 | 17 | RESOURCEMP, 18 | RESOURCECF, 19 | RESOURCETAGCF, 20 | RESOURCETAGCB, 21 | 22 | USERMP, 23 | USERCF, 24 | USERTAGCF, 25 | USERTAGCB 26 | } 27 | -------------------------------------------------------------------------------- /src/engine/BaseLevelLearningCollectiveEngine.java: -------------------------------------------------------------------------------- 1 | package engine; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedHashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import processing.BLLCalculator; 9 | import file.BookmarkReader; 10 | 11 | public class BaseLevelLearningCollectiveEngine implements EngineInterface { 12 | 13 | private BookmarkReader reader = null; 14 | private final Map collectiveTags = new LinkedHashMap(); 15 | 16 | public void loadFile(String path, String filename) throws Exception { 17 | BookmarkReader reader = EngineUtils.getSortedBookmarkReader(path, filename); 18 | Map collectiveTags = BLLCalculator.getCollectiveArtifactMap(reader, reader.getBookmarks(), null, false, new ArrayList(), new ArrayList(), 0.5, true); 19 | 20 | // map to strings 21 | Map collectiveTagNames = new LinkedHashMap(); 22 | for (Map.Entry tag : collectiveTags.entrySet()) { 23 | collectiveTagNames.put(reader.getTags().get(tag.getKey()), tag.getValue()); 24 | } 25 | 26 | resetStructures(reader, collectiveTagNames); 27 | } 28 | 29 | @Override 30 | public synchronized Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) { 31 | if (count == null || count.doubleValue() < 1) { 32 | count = 10; 33 | } 34 | Map tagMap = new LinkedHashMap(); 35 | if (algorithm == null || algorithm == Algorithm.BLLcoll) { 36 | tagMap = this.collectiveTags; 37 | } else { 38 | int userID = this.reader.getUsers().indexOf(user); 39 | if (user != null && userID != -1) { 40 | Map userTags = BLLCalculator.getSortedArtifactMapForUser(userID, this.reader, this.reader.getBookmarks(), null, false, new ArrayList(), new ArrayList(), 0.5, true); 41 | for (Map.Entry tag : userTags.entrySet()) { 42 | tagMap.put(this.reader.getTags().get(tag.getKey()), tag.getValue()); 43 | } 44 | } 45 | } 46 | 47 | Map returnMap = new LinkedHashMap(); 48 | for (Map.Entry entry : tagMap.entrySet()) { 49 | if (returnMap.size() < count.intValue()) { 50 | returnMap.put(entry.getKey(), entry.getValue()); 51 | } else { 52 | break; 53 | } 54 | } 55 | return returnMap; 56 | } 57 | 58 | private synchronized void resetStructures(BookmarkReader reader, Map collectiveTags) { 59 | this.reader = reader; 60 | this.collectiveTags.clear(); 61 | this.collectiveTags.putAll(collectiveTags); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/engine/EngineInterface.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package engine; 22 | 23 | import java.util.List; 24 | import java.util.Map; 25 | 26 | public interface EngineInterface { 27 | 28 | public void loadFile(String path, String filename) throws Exception; 29 | 30 | public Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type); 31 | } 32 | -------------------------------------------------------------------------------- /src/engine/EngineUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package engine; 22 | 23 | import java.io.File; 24 | import java.nio.file.Files; 25 | import java.util.ArrayList; 26 | import java.util.Collections; 27 | import java.util.LinkedHashMap; 28 | import java.util.List; 29 | import java.util.Map; 30 | import java.util.TreeMap; 31 | 32 | import common.Bookmark; 33 | import common.IntMapComparator; 34 | import file.BookmarkReader; 35 | import file.BookmarkSplitter; 36 | import file.BookmarkWriter; 37 | 38 | public class EngineUtils { 39 | 40 | public static BookmarkReader getSortedBookmarkReader(String path, String filename) { 41 | BookmarkReader reader = new BookmarkReader(0, false); 42 | reader.readFile(path, filename); 43 | 44 | Collections.sort(reader.getBookmarks()); 45 | 46 | String sortedFile = filename + "_sorted"; 47 | BookmarkWriter.writeSample(reader, reader.getBookmarks(), path, sortedFile, null, true); 48 | reader = new BookmarkReader(0, false); 49 | reader.readFile(path, sortedFile); 50 | 51 | try { 52 | String deletePath = ""; 53 | if (path == null) { 54 | deletePath = "./data/csv/" + sortedFile + ".txt"; 55 | } else { 56 | deletePath = path + sortedFile; 57 | } 58 | if (!new File(deletePath).delete()) { 59 | System.out.println("Problem while deleting sorted temp-file"); 60 | } 61 | } catch (Exception e) { 62 | System.out.println("Problem while deleting sorted temp-file"); 63 | } 64 | 65 | return reader; 66 | } 67 | 68 | public static List getFilterTags(boolean filterOwnEntities, BookmarkReader reader, String user, String resource) { 69 | List filterTags = new ArrayList(); 70 | if (filterOwnEntities && user != null) { 71 | if (resource != null) { 72 | int userID = -1; 73 | if (user != null) { 74 | userID = reader.getUsers().indexOf(user); 75 | } 76 | int resID = -1; 77 | if (resource != null) { 78 | resID = reader.getResources().indexOf(resource); 79 | } 80 | filterTags = Bookmark.getTagsOfBookmark(reader.getBookmarks(), userID, resID); 81 | }/* else { 82 | if (userMap != null) { 83 | filterTags = new ArrayList(userMap.keySet()); 84 | } 85 | }*/ 86 | } 87 | 88 | return filterTags; 89 | } 90 | 91 | public static Map calcTopEntities(BookmarkReader reader, EntityType type) { 92 | Map map = new LinkedHashMap<>(); 93 | Map countMap = new LinkedHashMap(); 94 | List entityCounts = null; 95 | if (type == EntityType.TAG) { 96 | entityCounts = reader.getTagCounts(); 97 | } else if (type == EntityType.RESOURCE) { 98 | entityCounts = reader.getResourceCounts(); 99 | } else { 100 | entityCounts = reader.getUserCounts(); 101 | } 102 | 103 | Integer countSum = 0; 104 | for (int i = 0; i < entityCounts.size(); i++) { 105 | countMap.put(i, entityCounts.get(i)); 106 | countSum += entityCounts.get(i); 107 | } 108 | 109 | Map sortedCountMap = new TreeMap(new IntMapComparator(countMap)); 110 | sortedCountMap.putAll(countMap); 111 | for (Map.Entry entry : sortedCountMap.entrySet()) { 112 | map.put(entry.getKey(), ((double) entry.getValue()) / countSum); 113 | } 114 | return map; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/engine/EntityRecommenderEngine.java: -------------------------------------------------------------------------------- 1 | package engine; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | // TODO: check for duplicates (user-resource combinations)! 7 | public class EntityRecommenderEngine implements EngineInterface { 8 | 9 | private EngineInterface tagRecommender; 10 | private EngineInterface resourceRecommender; 11 | private EngineInterface userRecommender; 12 | 13 | public void loadFile(String path, String filename) throws Exception { 14 | this.tagRecommender = new TagRecommenderEvalEngine(); 15 | this.tagRecommender.loadFile(path, filename); 16 | this.resourceRecommender = new ResourceRecommenderEngine(); 17 | this.resourceRecommender.loadFile(path, filename); 18 | this.userRecommender = new UserRecommenderEngine(); 19 | this.userRecommender.loadFile(path, filename); 20 | } 21 | 22 | public Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) { 23 | if (type == EntityType.TAG) { 24 | return tagRecommender.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 25 | } else if (type == EntityType.RESOURCE) { 26 | return resourceRecommender.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 27 | } else if (type == EntityType.USER) { 28 | return userRecommender.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 29 | } 30 | return null; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/engine/EntityType.java: -------------------------------------------------------------------------------- 1 | package engine; 2 | 3 | public enum EntityType { 4 | TAG, 5 | RESOURCE, 6 | USER 7 | } -------------------------------------------------------------------------------- /src/engine/MostPopularCollectiveEngine.java: -------------------------------------------------------------------------------- 1 | package engine; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedHashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import file.BookmarkReader; 9 | 10 | public class MostPopularCollectiveEngine implements EngineInterface { 11 | 12 | private BookmarkReader reader; 13 | private final Map collectiveTags = new LinkedHashMap(); 14 | 15 | public void loadFile(String path, String filename) throws Exception { 16 | BookmarkReader reader = EngineUtils.getSortedBookmarkReader(path, filename); 17 | Map collectiveTags = EngineUtils.calcTopEntities(reader, EntityType.TAG); 18 | 19 | // map to strings 20 | Map collectiveTagNames = new LinkedHashMap(); 21 | for (Map.Entry tag : collectiveTags.entrySet()) { 22 | collectiveTagNames.put(reader.getTags().get(tag.getKey()), tag.getValue()); 23 | } 24 | 25 | resetStructures(reader, collectiveTagNames); 26 | } 27 | 28 | @Override 29 | public synchronized Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) { 30 | if (count == null || count.doubleValue() < 1) { 31 | count = 10; 32 | } 33 | List filterTags = EngineUtils.getFilterTags(filterOwnEntities, this.reader, user, resource); 34 | 35 | Map returnMap = new LinkedHashMap(); 36 | for (Map.Entry entry : collectiveTags.entrySet()) { 37 | if (returnMap.size() < count.intValue()) { 38 | if (!filterTags.contains(entry.getKey())) { 39 | returnMap.put(entry.getKey(), entry.getValue()); 40 | } 41 | } else { 42 | break; 43 | } 44 | } 45 | return returnMap; 46 | } 47 | 48 | private synchronized void resetStructures(BookmarkReader reader, Map collectiveTags) { 49 | this.reader = reader; 50 | this.collectiveTags.clear(); 51 | this.collectiveTags.putAll(collectiveTags); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/engine/TagRecommenderEvalEngine.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package engine; 22 | 23 | import java.io.BufferedWriter; 24 | import java.io.File; 25 | import java.io.FileWriter; 26 | import java.io.IOException; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | import file.BookmarkReader; 31 | 32 | public class TagRecommenderEvalEngine implements EngineInterface { 33 | 34 | private EngineInterface lmEngine; 35 | private EngineInterface bllEngine; 36 | private EngineInterface threelEngine; 37 | private EngineInterface mpEngine; 38 | 39 | //private Random random; 40 | private BufferedWriter bw; 41 | 42 | public TagRecommenderEvalEngine() { 43 | this.lmEngine = null; 44 | this.bllEngine = null; 45 | this.threelEngine = null; 46 | this.mpEngine = null; 47 | 48 | //this.random = new Random(); 49 | this.bw = null; 50 | } 51 | 52 | @Override 53 | public void loadFile(String path, String filename) throws Exception { 54 | this.lmEngine = new LanguageModelEngine(); 55 | this.lmEngine.loadFile(path, filename); 56 | this.bllEngine = new BaseLevelLearningEngine(); 57 | this.bllEngine.loadFile(path, filename); 58 | this.threelEngine = new ThreeLayersCollectiveEngine(); 59 | this.threelEngine.loadFile(path, filename); 60 | this.mpEngine = new MostPopularCollectiveEngine(); 61 | this.mpEngine.loadFile(path, filename); 62 | 63 | try { 64 | String logFile = ""; 65 | if (path == null) { 66 | logFile = "./data/tagrec_log.txt"; 67 | } else { 68 | logFile = path + "tagrec_log.txt"; 69 | } 70 | FileWriter writer = new FileWriter(new File(logFile), true); 71 | this.bw = new BufferedWriter(writer); 72 | } catch (Exception e) { 73 | e.printStackTrace(); 74 | } 75 | } 76 | 77 | @Override 78 | public synchronized Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) { 79 | Map returnMap = null; 80 | String algorithmString = null; 81 | 82 | if (algorithm == Algorithm.BLLacMPr) { 83 | if (this.bllEngine != null) { 84 | algorithmString = Algorithm.BLLacMPr.name(); 85 | returnMap = this.bllEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 86 | } 87 | } else if (algorithm == Algorithm.MPur) { 88 | if (this.lmEngine != null) { 89 | algorithmString = Algorithm.MPur.name(); 90 | returnMap = this.lmEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 91 | } 92 | } else if (algorithm == Algorithm.THREELcoll) { 93 | if (topics != null && topics.size() > 0) { 94 | if (this.threelEngine != null) { 95 | algorithmString = Algorithm.THREELcoll.name(); 96 | returnMap = this.threelEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 97 | } 98 | } else { 99 | if (this.mpEngine != null) { 100 | algorithmString = Algorithm.THREELcoll.name(); 101 | returnMap = this.mpEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 102 | } 103 | } 104 | } else { 105 | if (this.mpEngine != null) { 106 | algorithmString = Algorithm.MP.name(); 107 | returnMap = this.mpEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 108 | } 109 | } 110 | 111 | /* KnowBrain study 112 | if (algorithm == null || algorithm == Algorithm.THREELcoll || algorithm == Algorithm.THREEL) { 113 | if (this.threelEngine != null) { 114 | returnMap = this.threelEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 115 | if (algorithm == Algorithm.THREEL) { 116 | algorithmString = "3L"; 117 | } else { 118 | algorithmString = "3Lcoll"; 119 | } 120 | } 121 | } else if (algorithm == Algorithm.BLLcoll || algorithm == Algorithm.BLL) { 122 | if (this.bllEngine != null) { 123 | returnMap = this.bllEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 124 | if (algorithm == Algorithm.BLL) { 125 | algorithmString = "BLL"; 126 | } else { 127 | algorithmString = "BLLcoll"; 128 | } 129 | } 130 | } else { 131 | if (this.mpEngine != null) { 132 | returnMap = this.mpEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type); 133 | algorithmString = "MP"; 134 | } 135 | } 136 | */ 137 | 138 | if (this.bw != null) { 139 | try { 140 | this.bw.write(user + "|" + resource + "|" + topics + "|" + count + "|" + filterOwnEntities + "|" + System.currentTimeMillis() + "|" + algorithmString + "|" + returnMap.keySet() + "\n"); 141 | this.bw.flush(); 142 | } catch (IOException e) { 143 | e.printStackTrace(); 144 | } 145 | } 146 | return returnMap; 147 | } 148 | 149 | public static boolean getRandomBoolean() { 150 | return Math.random() < 0.5; 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/engine/ThreeLayersCollectiveEngine.java: -------------------------------------------------------------------------------- 1 | package engine; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedHashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import common.CalculationType; 9 | import processing.BLLCalculator; 10 | import processing.ThreeLTCalculator; 11 | import file.BookmarkReader; 12 | 13 | public class ThreeLayersCollectiveEngine implements EngineInterface { 14 | 15 | private BookmarkReader reader = null; 16 | private ThreeLTCalculator calculator = null; 17 | 18 | public void loadFile(String path, String filename) throws Exception { 19 | BookmarkReader reader = EngineUtils.getSortedBookmarkReader(path, filename); 20 | ThreeLTCalculator calculator = new ThreeLTCalculator(reader, reader.getBookmarks().size(), 5, 5, true, false, false, CalculationType.NONE); 21 | 22 | resetStructures(reader, calculator); 23 | } 24 | 25 | @Override 26 | public synchronized Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) { 27 | if (count == null || count.doubleValue() < 1) { 28 | count = 10; 29 | } 30 | if (filterOwnEntities == null) { 31 | filterOwnEntities = true; 32 | } 33 | List filterTags = EngineUtils.getFilterTags(filterOwnEntities, this.reader, user, resource); 34 | 35 | List topicIDs = new ArrayList<>(); 36 | if (topics != null) { 37 | for (String t : topics) { 38 | List categories = this.reader.getCategories(); 39 | int tID = categories.indexOf(t.toLowerCase()); 40 | if (tID != -1) { 41 | topicIDs.add(tID); 42 | } 43 | } 44 | } 45 | Map tagIDs = null; 46 | if (algorithm == null || algorithm == Algorithm.THREELcoll) { 47 | tagIDs = this.calculator.getCollectiveRankedTagList(topicIDs, System.currentTimeMillis() / 1000.0, count.intValue(), false, false); 48 | } else { 49 | int userID = this.reader.getUsers().indexOf(user); 50 | if (user != null && userID != -1) { 51 | tagIDs = this.calculator.getRankedTagList(userID, -1, topicIDs, System.currentTimeMillis() / 1000.0, count.intValue(), false, false, true); 52 | } else { 53 | return new LinkedHashMap(); 54 | } 55 | } 56 | 57 | // map to strings 58 | Map tagStrings = new LinkedHashMap(); 59 | for (Map.Entry entry : tagIDs.entrySet()) { 60 | if (tagStrings.size() < count.intValue()) { 61 | if (!filterTags.contains(entry.getKey())) { 62 | tagStrings.put(this.reader.getTags().get(entry.getKey()), entry.getValue()); 63 | } 64 | } else { 65 | break; 66 | } 67 | } 68 | return tagStrings; 69 | } 70 | 71 | private synchronized void resetStructures(BookmarkReader reader, ThreeLTCalculator calculator) { 72 | this.reader = reader; 73 | this.calculator = calculator; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/engine/ThreeLayersEngine.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package engine; 22 | 23 | import processing.ThreeLTCalculator; 24 | import file.BookmarkReader; 25 | 26 | import java.util.ArrayList; 27 | import java.util.Collections; 28 | import java.util.LinkedHashMap; 29 | import java.util.List; 30 | import java.util.Map; 31 | import java.util.TreeMap; 32 | 33 | import common.CalculationType; 34 | import common.DoubleMapComparator; 35 | 36 | // TODO: make it work in online setting! (caching + LDA topic calculation) 37 | public class ThreeLayersEngine implements EngineInterface { 38 | 39 | private BookmarkReader reader = null; 40 | private ThreeLTCalculator calculator = null; 41 | private final Map topTags; 42 | 43 | public ThreeLayersEngine() { 44 | this.topTags = new LinkedHashMap<>(); 45 | this.reader = null; 46 | } 47 | 48 | public void loadFile(String path, String filename) throws Exception { 49 | BookmarkReader reader = EngineUtils.getSortedBookmarkReader(path, filename); 50 | 51 | ThreeLTCalculator calculator = new ThreeLTCalculator(reader, reader.getBookmarks().size(), 5, 5, true, true, false, CalculationType.NONE); 52 | Map topTags = EngineUtils.calcTopEntities(reader, EntityType.TAG); 53 | resetStructure(reader, calculator, topTags); 54 | } 55 | 56 | public synchronized Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) { 57 | if (count == null || count.doubleValue() < 1) { 58 | count = 10; 59 | } 60 | if (filterOwnEntities == null) { 61 | filterOwnEntities = true; 62 | } 63 | List filterTags = new ArrayList(); 64 | 65 | Map tagIDs = new LinkedHashMap<>(); 66 | Map tagMap = new LinkedHashMap<>(); 67 | if (this.reader == null || this.calculator == null) { 68 | return tagMap; 69 | } 70 | if (algorithm == null || algorithm != Algorithm.MP) { 71 | int userID = -1; 72 | if (user != null) { 73 | userID = this.reader.getUsers().indexOf(user); 74 | } 75 | filterTags = EngineUtils.getFilterTags(filterOwnEntities, this.reader, user, resource/*, this.calculator.getUserMaps().get(userID)*/); 76 | int resID = -1; 77 | if (resource != null) { 78 | resID = this.reader.getResources().indexOf(resource); 79 | } 80 | List topicIDs = new ArrayList<>(); 81 | if (topics != null) { 82 | for (String t : topics) { 83 | int tID = this.reader.getCategories().indexOf(t); 84 | if (tID != -1) { 85 | topicIDs.add(tID); 86 | } 87 | } 88 | } 89 | if (algorithm == null || algorithm == Algorithm.THREELTMPr) { 90 | tagIDs = this.calculator.getRankedTagList(userID, resID, topicIDs, System.currentTimeMillis() / 1000.0, count, this.reader.hasTimestamp(), false, false); // not sorted 91 | } else if (algorithm == Algorithm.THREELT) { 92 | tagIDs = this.calculator.getRankedTagList(userID, -1, topicIDs, System.currentTimeMillis() / 1000.0, count, this.reader.hasTimestamp(), false, false); // not sorted 93 | } else if (algorithm == Algorithm.THREEL) { 94 | tagIDs = this.calculator.getRankedTagList(userID, -1, topicIDs, System.currentTimeMillis() / 1000.0, count, false, false, false); // not sorted 95 | } 96 | } 97 | 98 | // TODO: finish filtering 99 | 100 | // fill up with MP tags 101 | if (tagIDs.size() < count) { 102 | for (Map.Entry t : this.topTags.entrySet()) { 103 | if (tagIDs.size() < count) { 104 | if (!tagIDs.containsKey(t.getKey())) { 105 | tagIDs.put(t.getKey(), t.getValue()); 106 | } 107 | } else { 108 | break; 109 | } 110 | } 111 | } 112 | 113 | // sort 114 | Map sortedResultMap = new TreeMap(new DoubleMapComparator(tagIDs)); 115 | sortedResultMap.putAll(tagIDs); 116 | // map tag-IDs back to strings 117 | for (Map.Entry tEntry : sortedResultMap.entrySet()) { 118 | if (tagMap.size() < count) { 119 | tagMap.put(this.reader.getTags().get(tEntry.getKey()), tEntry.getValue()); 120 | } 121 | } 122 | return tagMap; 123 | } 124 | 125 | public synchronized void resetStructure(BookmarkReader reader, ThreeLTCalculator calculator, Map topTags) { 126 | this.reader = reader; 127 | this.calculator = calculator; 128 | 129 | this.topTags.clear(); 130 | this.topTags.putAll(topTags); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/engine/UserRecommenderEngine.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package engine; 22 | 23 | import file.BookmarkReader; 24 | import itemrecommendations.CFResourceCalculator; 25 | 26 | import java.util.Collections; 27 | import java.util.LinkedHashMap; 28 | import java.util.List; 29 | import java.util.Map; 30 | import java.util.TreeMap; 31 | 32 | import common.Bookmark; 33 | import common.DoubleMapComparator; 34 | import common.Features; 35 | import common.Similarity; 36 | 37 | // TODO: cache values 38 | public class UserRecommenderEngine implements EngineInterface { 39 | 40 | private BookmarkReader reader = null; 41 | private CFResourceCalculator calculator = null; 42 | private CFResourceCalculator tagCalculator = null; 43 | private CFResourceCalculator cbCalculator = null; 44 | private final Map topUsers; 45 | 46 | public UserRecommenderEngine() { 47 | this.topUsers = new LinkedHashMap(); 48 | this.reader = new BookmarkReader(0, false); 49 | } 50 | 51 | public void loadFile(String path, String filename) throws Exception { 52 | BookmarkReader reader = EngineUtils.getSortedBookmarkReader(path, filename); 53 | 54 | CFResourceCalculator calculator = new CFResourceCalculator(reader, reader.getBookmarks().size(), false, true, false, 5, Similarity.COSINE, Features.ENTITIES); 55 | CFResourceCalculator tagCalculator = new CFResourceCalculator(reader, reader.getBookmarks().size(), false, true, false, 5, Similarity.COSINE, Features.TAGS); 56 | CFResourceCalculator cbCalculator = new CFResourceCalculator(reader, reader.getBookmarks().size(), false, false, true, 5, Similarity.COSINE, Features.TAGS); 57 | 58 | Map topUsers = EngineUtils.calcTopEntities(reader, EntityType.USER); 59 | resetStructure(reader, calculator, tagCalculator, cbCalculator, topUsers); 60 | } 61 | 62 | public synchronized Map getEntitiesWithLikelihood(String user, String resource, List topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) { 63 | 64 | if (count == null || count.doubleValue() < 1) { 65 | count = 10; 66 | } 67 | if (filterOwnEntities == null) { 68 | filterOwnEntities = true; 69 | } 70 | 71 | Map userIDs = new LinkedHashMap<>(); 72 | Map userMap = new LinkedHashMap<>(); 73 | if (this.reader == null || this.calculator == null) { 74 | System.out.println("No data has been loaded"); 75 | return userMap; 76 | } 77 | int userID = -1; 78 | int resID = -1; 79 | if (user != null) { 80 | userID = this.reader.getUsers().indexOf(user); 81 | } 82 | if (resource != null) { 83 | resID = this.reader.getResources().indexOf(resource); 84 | } 85 | // used to filter already assigned users 86 | List resourceUsers = null; 87 | if (resID != -1 && filterOwnEntities.booleanValue()) { 88 | resourceUsers = Bookmark.getUsersFromResource(this.reader.getBookmarks(), resID); 89 | } 90 | 91 | if (algorithm == null || algorithm != Algorithm.USERMP) { 92 | if (userID != -1) { 93 | if (algorithm == Algorithm.USERTAGCF) { 94 | userIDs = this.tagCalculator.getRankedResourcesList(userID, -1, false, false, false, filterOwnEntities.booleanValue(), true); // not sorted! 95 | } else { 96 | userIDs = this.calculator.getRankedResourcesList(userID, -1, false, false, false, filterOwnEntities.booleanValue(), true); // not sorted! 97 | } 98 | } else if (resID != -1) { 99 | userIDs = this.cbCalculator.getRankedResourcesList(-1, resID, false, false, false, filterOwnEntities.booleanValue(), true); // not sorted 100 | } 101 | } 102 | // then call MP if necessary 103 | if (userIDs.size() < count) { 104 | for (Map.Entry t : this.topUsers.entrySet()) { 105 | if (userIDs.size() < count) { 106 | // add MP users if they are not already in the recommeded list 107 | if (!userIDs.containsKey(t.getKey()) && (resourceUsers == null || !resourceUsers.contains(t.getKey()))) { 108 | userIDs.put(t.getKey(), t.getValue()); 109 | } 110 | } else { 111 | break; 112 | } 113 | } 114 | } 115 | 116 | // sort 117 | Map sortedResultMap = new TreeMap(new DoubleMapComparator(userIDs)); 118 | sortedResultMap.putAll(userIDs); 119 | 120 | // last map IDs back to strings 121 | for (Map.Entry tEntry : sortedResultMap.entrySet()) { 122 | if (userMap.size() < count) { 123 | userMap.put(this.reader.getUsers().get(tEntry.getKey()), tEntry.getValue()); 124 | } else { 125 | break; 126 | } 127 | } 128 | 129 | return userMap; 130 | } 131 | 132 | public synchronized void resetStructure(BookmarkReader reader, CFResourceCalculator calculator, CFResourceCalculator tagCalculator, CFResourceCalculator cbCalculator, Map topUsers) { 133 | this.reader = reader; 134 | this.calculator = calculator; 135 | this.tagCalculator = tagCalculator; 136 | this.cbCalculator = cbCalculator; 137 | 138 | this.topUsers.clear(); 139 | this.topUsers.putAll(topUsers); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/file/BookmarkWriter.java: -------------------------------------------------------------------------------- 1 | package file; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.FileOutputStream; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.io.InputStreamReader; 10 | import java.io.OutputStreamWriter; 11 | import java.util.List; 12 | 13 | import com.google.common.primitives.Ints; 14 | 15 | import common.Bookmark; 16 | 17 | public class BookmarkWriter { 18 | 19 | public static boolean writeSample(BookmarkReader reader, List userSample, String filename, List catPredictions, boolean realValues) { 20 | return doWriteSample(reader, userSample, null, filename, catPredictions, realValues); 21 | } 22 | 23 | public static boolean writeSample(BookmarkReader reader, List userSample, String path, String filename, List catPredictions, boolean realValues) { 24 | return doWriteSample(reader, userSample, path, filename, catPredictions, realValues); 25 | } 26 | 27 | private static boolean doWriteSample(BookmarkReader reader, List userSample, String path, String filename, List catPredictions, boolean realValues) { 28 | try { 29 | String filePath = ""; 30 | if (path == null) { 31 | filePath = "./data/csv/" + filename + ".txt"; 32 | } else { 33 | filePath = path + filename; 34 | } 35 | //FileWriter writer = new FileWriter(new File(filePath)); 36 | OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(new File(filePath)), "UTF8"); 37 | BufferedWriter bw = new BufferedWriter(writer); 38 | int userCount = 0; 39 | // TODO: check encoding 40 | for (Bookmark bookmark : userSample) { 41 | String user = (realValues ? reader.getUsers().get(bookmark.getUserID()).replace("\"", "") : Integer.toString(bookmark.getUserID())); 42 | String resource = (realValues ? reader.getResources().get(bookmark.getResourceID()).replace("\"", "") : Integer.toString(bookmark.getResourceID())); 43 | bw.write("\"" + user + "\";"); 44 | bw.write("\"" + resource + "\";"); 45 | bw.write("\"" + bookmark.getTimestamp().replace("\"", "") + "\";\""); 46 | int i = 0; 47 | for (int t : bookmark.getTags()) { 48 | String tag = (realValues ? reader.getTags().get(t).replace("\"", "") : Integer.toString(t)); 49 | bw.write(tag); 50 | if (++i < bookmark.getTags().size()) { 51 | bw.write(','); 52 | } 53 | } 54 | bw.write("\";\""); 55 | 56 | List userCats = (catPredictions == null ? bookmark.getCategories() : Ints.asList(catPredictions.get(userCount++))); 57 | i = 0; 58 | for (int cat : userCats) { 59 | //bw.write(URLEncoder.encode((catPredictions == null ? reader.getCategories().get(cat).replace("\"", "") : reader.getTags().get(cat)).replace("\"", ""), "UTF-8")); 60 | String catName = (realValues ? reader.getCategories().get(cat).replace("\"", "") : "t" + cat); 61 | bw.write(catName); 62 | if (++i < userCats.size()) { 63 | bw.write(','); 64 | } 65 | } 66 | bw.write("\""); 67 | if (bookmark.getRating() != -2) { 68 | bw.write(";\"" + (int)bookmark.getRating() + "\""); 69 | } else { 70 | bw.write(";\"\""); 71 | } 72 | if (bookmark.getTitle() != null) { 73 | bw.write(";\"" + bookmark.getTitle() + "\""); 74 | } /*else { 75 | bw.write(";\"\""); 76 | }*/ 77 | bw.write("\n"); 78 | } 79 | 80 | bw.flush(); 81 | bw.close(); 82 | return true; 83 | } catch (IOException e) { 84 | e.printStackTrace(); 85 | } 86 | return false; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/file/ResultSerializer.java: -------------------------------------------------------------------------------- 1 | package file; 2 | 3 | import java.io.BufferedOutputStream; 4 | import java.io.FileOutputStream; 5 | import java.io.ObjectOutput; 6 | import java.io.ObjectOutputStream; 7 | import java.io.OutputStream; 8 | import java.util.Map; 9 | 10 | public class ResultSerializer { 11 | 12 | public static void serializePredictions(Map> predictions, String filePath) { 13 | OutputStream file = null; 14 | try { 15 | file = new FileOutputStream(filePath); 16 | OutputStream buffer = new BufferedOutputStream(file); 17 | ObjectOutput output = new ObjectOutputStream(buffer); 18 | output.writeObject(predictions); 19 | output.close(); 20 | } catch (Exception e) { 21 | e.printStackTrace(); 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/file/postprocessing/CatDescFiltering.java: -------------------------------------------------------------------------------- 1 | package file.postprocessing; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.LinkedHashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Set; 9 | import java.util.TreeMap; 10 | 11 | 12 | 13 | import common.Bookmark; 14 | import common.DoubleMapComparator; 15 | import common.Utilities; 16 | 17 | import file.BookmarkReader; 18 | 19 | public class CatDescFiltering { 20 | 21 | private BookmarkReader reader; 22 | private List trainList; 23 | private List> userMaps; 24 | private List> userResourceLists; 25 | private Map trrMap; 26 | private Map catDescMap; 27 | private double splitValue; 28 | private boolean describer; 29 | 30 | public CatDescFiltering(BookmarkReader reader, int trainSize) { 31 | this.reader = reader; 32 | this.trainList = this.reader.getBookmarks().subList(0, trainSize); 33 | this.userMaps = Utilities.getUserMaps(this.trainList); 34 | this.userResourceLists = Utilities.getUserResourceLists(this.trainList); 35 | this.trrMap = new LinkedHashMap(); 36 | 37 | for (int i = 0; i < reader.getUsers().size(); i++) { 38 | //if (reader.getUserCounts().get(i) >= 20) { 39 | double trr = this.getTRR(i); 40 | if (trr > 0.0) { 41 | this.trrMap.put(i, trr); 42 | } 43 | //} 44 | } 45 | Map sortedTrrMap = new TreeMap(new DoubleMapComparator(this.trrMap)); 46 | sortedTrrMap.putAll(this.trrMap); 47 | 48 | int splitSize = this.trrMap.size() / 2; // reader.getUsers().size() / 2; 49 | System.out.println("Split size: " + splitSize); 50 | List trrList = new ArrayList(sortedTrrMap.values()); 51 | this.splitValue = trrList.get(splitSize); 52 | System.out.println("TRR split value: " + this.splitValue); 53 | 54 | this.catDescMap = new LinkedHashMap(); 55 | int i = 0; 56 | for (Map.Entry entry : sortedTrrMap.entrySet()) { 57 | if (i++ < splitSize) { 58 | this.catDescMap.put(entry.getKey(), true); 59 | } else { 60 | this.catDescMap.put(entry.getKey(), false); 61 | } 62 | //System.out.println(entry.getKey() + " " + entry.getValue()); 63 | } 64 | } 65 | 66 | public void setDescriber(boolean categorizer) { 67 | this.describer = categorizer; 68 | } 69 | 70 | public boolean getDescriber() { 71 | return this.describer; 72 | } 73 | 74 | public boolean evaluate(int userID) { 75 | Boolean describer = this.isDescriber(userID); 76 | if (describer != null) { 77 | return (describer.booleanValue() == this.describer); 78 | } 79 | return false; 80 | } 81 | 82 | // IMPORTANT: could be null! then do not evaluate! 83 | private Boolean isDescriber(int userID) { 84 | //return (this.trrMap.get(userID) > this.splitValue); 85 | if (this.catDescMap.containsKey(userID)) { 86 | return this.catDescMap.get(userID); 87 | } 88 | return null; 89 | } 90 | 91 | private double getTRR(int userID) { 92 | if (userID < this.userMaps.size() && userID < this.userResourceLists.size()) { 93 | double trr = (double)this.userMaps.get(userID).keySet().size() / (double)this.userResourceLists.get(userID).size(); 94 | return trr; 95 | } 96 | return 0.0; // TODO: check if null should be returned 97 | } 98 | 99 | private double getTPP(int userID) { 100 | double tpp = Utilities.getMapCount(this.userMaps.get(userID)) / (double)this.userResourceLists.get(userID).size(); 101 | return tpp; 102 | } 103 | 104 | private double getOrphanRatio(int userID) { 105 | Map userMap = this.userMaps.get(userID); 106 | int n = (int)Math.ceil((double)Collections.max(userMap.values()) / 100.0); 107 | int count = 0; 108 | for (int val : userMap.values()) { 109 | if (val <= n) { 110 | count++; 111 | } 112 | } 113 | return (double)count / (double)userMap.size(); 114 | } 115 | 116 | // Statics ----------------------------------------------------------------------------------------------------------------------- 117 | 118 | public static CatDescFiltering instantiate(String filename, int trainSize) { 119 | BookmarkReader reader = new BookmarkReader(trainSize, false); 120 | reader.readFile(filename); 121 | CatDescFiltering filter = new CatDescFiltering(reader, trainSize); 122 | 123 | /* 124 | int catCount = 0, descCount = 0; 125 | filter.setCategorizer(true); 126 | for (int i = 0; i < reader.getUsers().size(); i++) { 127 | try { 128 | if (filter.evaluate(i)) { 129 | catCount++; 130 | } else { 131 | descCount++; 132 | } 133 | } catch (Exception e) { 134 | // TODO: why? 135 | } 136 | } 137 | System.out.println("CatCount: " + catCount); 138 | System.out.println("DescCount: " + descCount); 139 | */ 140 | 141 | return filter; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/file/preprocessing/BibBookmark.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | public class BibBookmark { 4 | public String id = ""; 5 | public String urlHash = ""; 6 | public String title = ""; 7 | public String desc = ""; 8 | public String extDesc = ""; 9 | } 10 | -------------------------------------------------------------------------------- /src/file/preprocessing/CiteULikeProcessor.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.BufferedWriter; 5 | import java.io.File; 6 | import java.io.FileReader; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.sql.Timestamp; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | public class CiteULikeProcessor { 14 | 15 | public static boolean processFile(String inputFile, String outputFile) { 16 | try { 17 | FileReader reader = new FileReader(new File("./data/csv/cul_core/" + inputFile)); 18 | FileWriter writer = new FileWriter(new File("./data/csv/cul_core/" + outputFile + ".txt")); 19 | BufferedReader br = new BufferedReader(reader); 20 | BufferedWriter bw = new BufferedWriter(writer); 21 | String line = null; 22 | String resID = "", userHash = "", timestamp = ""; 23 | List tags = new ArrayList(); 24 | 25 | while ((line = br.readLine()) != null) { 26 | String[] lineParts = line.split("\\|"); 27 | String tag = lineParts[3]; 28 | if (!tag.isEmpty() && !tag.equals("no-tag") && !tag.contains("-import") && !tag.contains("-export") && !tag.contains("sys:") && !tag.contains("system:") && !tag.contains("imported")) { 29 | if (!resID.isEmpty() && !userHash.isEmpty() && (!resID.equals(lineParts[0]) || !userHash.equals(lineParts[1]))) { 30 | writeLine(bw, resID, userHash, timestamp, tags); 31 | tags.clear(); 32 | } 33 | resID = lineParts[0]; 34 | userHash = lineParts[1]; 35 | timestamp = lineParts[2]; 36 | tags.add(tag); 37 | } 38 | } 39 | writeLine(bw, resID, userHash, timestamp, tags); 40 | 41 | br.close(); 42 | bw.flush(); 43 | bw.close(); 44 | return true; 45 | } catch (Exception e) { 46 | e.printStackTrace(); 47 | } 48 | return false; 49 | } 50 | 51 | private static boolean writeLine(BufferedWriter bw, String resID, String userHash, String timestamp, List tags) { 52 | try { 53 | String tagString = ""; 54 | for (String tag : tags) { 55 | tagString += (tag + ","); 56 | } 57 | tagString = tagString.length() > 0 ? tagString.substring(0, tagString.length() - 1) : ""; 58 | 59 | bw.write("\"" + userHash + "\";\"" + resID + "\";\"" + processTimestamp(timestamp) + "\";\"" + tagString + "\";\"\"\n"); 60 | return true; 61 | } catch (IOException e) { 62 | e.printStackTrace(); 63 | } 64 | return false; 65 | } 66 | 67 | private static long processTimestamp(String timestamp) { 68 | timestamp = timestamp.substring(0, timestamp.lastIndexOf("+")); 69 | return Timestamp.valueOf(timestamp).getTime() / 1000; // because of seconds 70 | } 71 | } -------------------------------------------------------------------------------- /src/file/preprocessing/CoreFiltering.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | package file.preprocessing; 21 | 22 | import java.util.ArrayList; 23 | import java.util.HashSet; 24 | 25 | import common.Bookmark; 26 | 27 | import file.BookmarkReader; 28 | 29 | public class CoreFiltering { 30 | 31 | private BookmarkReader reader; 32 | 33 | public CoreFiltering(BookmarkReader reader) { 34 | this.reader = reader; 35 | } 36 | 37 | public BookmarkReader filterOrphansIterative(int level) { 38 | return filterOrphansIterative(level, level, level); 39 | } 40 | 41 | public BookmarkReader filterOrphansIterative(int userLevel, int resLevel, int tagLevel) { 42 | HashSet userIDs = new HashSet(); 43 | for (int i = 0; i < this.reader.getUsers().size(); i++) { 44 | int count = this.reader.getUserCounts().get(i); 45 | if (count >= userLevel) { 46 | userIDs.add(i); 47 | } 48 | } 49 | System.out.println("User IDs determined ..."); 50 | 51 | HashSet resIDs = new HashSet(); 52 | for (int i = 0; i < this.reader.getResources().size(); i++) { 53 | int count = this.reader.getResourceCounts().get(i); 54 | if (count >= resLevel) { 55 | resIDs.add(i); 56 | } 57 | } 58 | System.out.println("Res IDs determined ..."); 59 | 60 | HashSet tagIDs = new HashSet(); 61 | if (tagLevel > 0) { 62 | for (int i = 0; i < this.reader.getTags().size(); i++) { 63 | int count = this.reader.getTagCounts().get(i); 64 | if (count >= tagLevel) { 65 | tagIDs.add(i); 66 | } 67 | } 68 | System.out.println("Tag IDs determined ..."); 69 | } 70 | 71 | System.out.println("Start removing ..."); 72 | ArrayList keepData = new ArrayList(); 73 | for (Bookmark data : this.reader.getBookmarks()) { 74 | int resID = data.getResourceID(); 75 | int userID = data.getUserID(); 76 | if (resIDs.contains(resID) && userIDs.contains(userID)) { 77 | if (tagLevel > 0) { 78 | ArrayList tags = new ArrayList(); 79 | for (Integer tag : data.getTags()) { 80 | if (tagIDs.contains(tag)) { 81 | tags.add(tag); 82 | } 83 | } 84 | if (tags.size() > 0) { 85 | data.setTags(tags); 86 | keepData.add(data); 87 | } 88 | } else { 89 | keepData.add(data); 90 | } 91 | } 92 | } 93 | 94 | System.out.println("Kept lines: " + keepData.size()); 95 | this.reader.setBookmarks(keepData); 96 | return this.reader; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/file/preprocessing/JKULFMProcessor.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.BufferedWriter; 5 | import java.io.File; 6 | import java.io.FileInputStream; 7 | import java.io.FileOutputStream; 8 | import java.io.IOException; 9 | import java.io.InputStreamReader; 10 | import java.io.OutputStreamWriter; 11 | import java.util.ArrayList; 12 | import java.util.HashSet; 13 | import java.util.List; 14 | import java.util.Set; 15 | 16 | public class JKULFMProcessor { 17 | 18 | private static final String EVENTS_FILE = "./data/schedl/LFM-1b_LEs.txt"; 19 | 20 | private static Set getFilterUsers(String filterFile) throws Exception { 21 | Set filterUsers = new HashSet(); 22 | InputStreamReader reader = new InputStreamReader(new FileInputStream(new File(filterFile)), "UTF8"); 23 | BufferedReader br = new BufferedReader(reader); 24 | String line = null; 25 | int i = 0; 26 | while ((line = br.readLine()) != null) { 27 | if (i > 0) { 28 | String userID = line.substring(0, line.indexOf(',')); 29 | filterUsers.add(userID); 30 | } 31 | i++; 32 | } 33 | System.out.println("Number of users: " + (i - 1)); 34 | br.close(); 35 | reader.close(); 36 | 37 | return filterUsers; 38 | } 39 | 40 | private static List getFilterLines(Set filterUsers) throws Exception { 41 | String filePath = EVENTS_FILE; 42 | List filterLines = new ArrayList(); 43 | 44 | InputStreamReader reader = new InputStreamReader(new FileInputStream(new File(filePath)), "UTF8"); 45 | BufferedReader br = new BufferedReader(reader); 46 | String line = null; 47 | int i = 0; 48 | while ((line = br.readLine()) != null) { 49 | String userID = line.substring(0, line.indexOf('\t')); 50 | if (filterUsers.contains(userID)) { 51 | filterLines.add(line); 52 | i++; 53 | } 54 | } 55 | System.out.println("Number of lines: " + i); 56 | br.close(); 57 | reader.close(); 58 | 59 | return filterLines; 60 | } 61 | 62 | private static void writeOutputFile(String outputFile, List lines) throws Exception { 63 | OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(new File(outputFile)), "UTF8"); 64 | BufferedWriter bw = new BufferedWriter(writer); 65 | 66 | int i = 0; 67 | for(String l : lines) { 68 | bw.write(l + '\n'); 69 | i++; 70 | } 71 | System.out.println("Written lines: " + i); 72 | 73 | bw.flush(); 74 | bw.close(); 75 | writer.close(); 76 | } 77 | 78 | public static void preprocessFile(String filterFile, String outputFile) { 79 | try { 80 | // get filter users 81 | Set filterUsers = getFilterUsers(filterFile); 82 | 83 | // read big file and filter user actions 84 | List filterLines = getFilterLines(filterUsers); 85 | 86 | // write file 87 | writeOutputFile(outputFile, filterLines); 88 | } catch(Exception e) { 89 | e.printStackTrace(); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/file/preprocessing/JSONProcessor.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileOutputStream; 7 | import java.io.IOException; 8 | import java.io.OutputStreamWriter; 9 | import java.util.HashSet; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.Set; 13 | 14 | import org.json.simple.JSONArray; 15 | import org.json.simple.JSONObject; 16 | 17 | import common.Bookmark; 18 | import common.Utilities; 19 | import file.BookmarkReader; 20 | 21 | public class JSONProcessor { 22 | 23 | public static void writeJSONOutput(String filename) { 24 | BookmarkReader reader = new BookmarkReader(0, false); 25 | reader.readFile(filename); 26 | Set resources = new HashSet(); 27 | FileOutputStream writer = null; 28 | try { 29 | writer = new FileOutputStream(new File("./data/csv/" + filename + ".json")); 30 | } catch (Exception e) { 31 | e.printStackTrace(); 32 | } 33 | BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(writer)); 34 | 35 | for (Bookmark bookmark : reader.getBookmarks()) { 36 | //if (!resources.contains(bookmark.getResourceID())) { 37 | JSONObject jsonOutput = new JSONObject(); 38 | jsonOutput.put("url", reader.getResources().get(bookmark.getResourceID())); 39 | jsonOutput.put("timestamp", new Integer(bookmark.getTimestamp())); 40 | JSONArray jsonTags = new JSONArray(); 41 | for (Integer tag : bookmark.getTags()) { 42 | jsonTags.add(reader.getTags().get(tag)); 43 | } 44 | jsonOutput.put("tags", jsonTags); 45 | resources.add(bookmark.getResourceID()); 46 | try { 47 | bw.write(jsonOutput.toJSONString() + "\n"); 48 | } catch (Exception e) { 49 | e.printStackTrace(); 50 | } 51 | //} 52 | } 53 | try { 54 | bw.close(); 55 | } catch (IOException e) { 56 | // TODO Auto-generated catch block 57 | e.printStackTrace(); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/file/preprocessing/LastFMProcessor.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.BufferedWriter; 5 | import java.io.File; 6 | import java.io.FileReader; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | // used for LastFm and Delicious (small) 13 | // userID bookmarkID tagID timestamp 14 | public class LastFMProcessor { 15 | 16 | public static boolean processFile(String inputFile, String outputFile) { 17 | try { 18 | FileReader reader = new FileReader(new File("./data/csv/lastfm_core/" + inputFile)); 19 | FileWriter writer = new FileWriter(new File("./data/csv/lastfm_core/" + outputFile + ".txt")); 20 | BufferedReader br = new BufferedReader(reader); 21 | BufferedWriter bw = new BufferedWriter(writer); 22 | String line = null; 23 | String resID = "", userHash = "", tagID = "", timestamp = ""; 24 | List tags = new ArrayList(); 25 | 26 | int i = 0; 27 | while ((line = br.readLine()) != null) { 28 | if (i++ == 0) { // skip first line 29 | continue; 30 | } 31 | String[] lineParts = line.split("\t"); 32 | if (!resID.isEmpty() && !userHash.isEmpty() && (!resID.equals(lineParts[1]) || !userHash.equals(lineParts[0]))) { 33 | //resID = getNameByID(resID, "resources"); 34 | if (resID != null) { 35 | writeLine(bw, resID, userHash, timestamp, tags); 36 | } 37 | tags.clear(); 38 | } 39 | tagID = lineParts[2]; 40 | resID = lineParts[1]; 41 | userHash = lineParts[0]; 42 | timestamp = lineParts[3]; 43 | String tagName = getNameByID(tagID, "tags.dat"); 44 | if (tagName != null) { 45 | tags.add(tagName); 46 | } 47 | //tags.add(tagID); 48 | } 49 | writeLine(bw, resID, userHash, timestamp, tags); 50 | 51 | br.close(); 52 | bw.flush(); 53 | bw.close(); 54 | return true; 55 | } catch (Exception e) { 56 | e.printStackTrace(); 57 | } 58 | return false; 59 | } 60 | 61 | private static boolean writeLine(BufferedWriter bw, String resID, String userHash, String timestamp, List tags) { 62 | try { 63 | String tagString = ""; 64 | for (String tag : tags) { 65 | tagString += (tag + ","); 66 | } 67 | tagString = tagString.length() > 0 ? tagString.substring(0, tagString.length() - 1) : ""; 68 | 69 | bw.write("\"" + userHash + "\";\"" + resID + "\";\"" + processTimestamp(timestamp) + "\";\"" + tagString + "\";\"\"\n"); 70 | return true; 71 | } catch (IOException e) { 72 | e.printStackTrace(); 73 | } 74 | return false; 75 | } 76 | 77 | private static long processTimestamp(String timestamp) { 78 | return Long.parseLong(timestamp) / 1000; // because of seconds 79 | } 80 | 81 | private static String getNameByID(String id, String file) { 82 | String line = null; 83 | try { 84 | FileReader bookmarkReader = new FileReader(new File("./data/csv/lastfm_core/" + file)); 85 | BufferedReader bookmarkBr = new BufferedReader(bookmarkReader); 86 | while ((line = bookmarkBr.readLine()) != null) { 87 | String[] lineParts = line.split("\t"); 88 | if (lineParts.length >= 2 && lineParts[0].equals(id)) { 89 | bookmarkBr.close(); 90 | return lineParts[1]; 91 | } 92 | } 93 | bookmarkBr.close(); 94 | } catch (Exception e) { 95 | e.printStackTrace(); 96 | } 97 | return null; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/file/preprocessing/MovielensProcessor.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.BufferedWriter; 5 | import java.io.File; 6 | import java.io.FileReader; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.sql.Timestamp; 10 | import java.util.ArrayList; 11 | import java.util.LinkedHashMap; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | public class MovielensProcessor { 16 | 17 | public static boolean processFile(String inputFile, String outputFile, String ratingFile) { 18 | try { 19 | Map ratingMap = null; 20 | if (ratingFile != null) { 21 | ratingMap = new LinkedHashMap(); 22 | FileReader reader = new FileReader(new File("./data/csv/ml_core/" + ratingFile)); 23 | BufferedReader br = new BufferedReader(reader); 24 | String line = null; 25 | while ((line = br.readLine()) != null) { 26 | String[] lineParts = line.split("::"); 27 | ratingMap.put(lineParts[0] + "_" + lineParts[1], lineParts[2]); 28 | } 29 | br.close(); 30 | } 31 | 32 | 33 | FileReader reader = new FileReader(new File("./data/csv/ml_core/" + inputFile)); 34 | FileWriter writer = new FileWriter(new File("./data/csv/ml_core/" + outputFile + ".txt")); 35 | BufferedReader br = new BufferedReader(reader); 36 | BufferedWriter bw = new BufferedWriter(writer); 37 | String line = null; 38 | String resID = "", userHash = "", timestamp = ""; 39 | List tags = new ArrayList(); 40 | 41 | while ((line = br.readLine()) != null) { 42 | String[] lineParts = line.split("::"); 43 | String tag = lineParts[2]; 44 | //if (!filter || (!tag.contains("no-tag") && !tag.contains("-import"))) { 45 | if (!resID.isEmpty() && !userHash.isEmpty() && (!resID.equals(lineParts[1]) || !userHash.equals(lineParts[0]))) { 46 | writeLine(bw, resID, userHash, timestamp, tags, ratingMap); 47 | tags.clear(); 48 | } 49 | resID = lineParts[1]; 50 | userHash = lineParts[0]; 51 | timestamp = lineParts[3]; 52 | tags.add(tag); 53 | //} 54 | } 55 | writeLine(bw, resID, userHash, timestamp, tags, ratingMap); 56 | 57 | br.close(); 58 | bw.flush(); 59 | bw.close(); 60 | return true; 61 | } catch (Exception e) { 62 | e.printStackTrace(); 63 | } 64 | return false; 65 | } 66 | 67 | private static boolean writeLine(BufferedWriter bw, String resID, String userHash, String timestamp, List tags, Map ratingMap) { 68 | try { 69 | String tagString = ""; 70 | for (String tag : tags) { 71 | tagString += (tag + ","); 72 | } 73 | tagString = tagString.length() > 0 ? tagString.substring(0, tagString.length() - 1) : ""; 74 | 75 | String rating = ";\"\""; 76 | boolean isRated = false; 77 | if(ratingMap != null) { 78 | rating = ratingMap.get(userHash + "_" + resID); 79 | if (rating != null) { 80 | double ratingVal = Double.parseDouble(rating) * 2.0; 81 | rating = ";\"" + (int)ratingVal + "\""; 82 | isRated = true; 83 | } else { 84 | rating = ";\"\""; 85 | } 86 | } 87 | 88 | if (isRated) { 89 | bw.write("\"" + userHash + "\";\"" + resID + "\";\"" + timestamp + "\";\"" + tagString + "\";\"\"" + rating + "\n"); 90 | } 91 | return true; 92 | } catch (IOException e) { 93 | e.printStackTrace(); 94 | } 95 | return false; 96 | } 97 | } -------------------------------------------------------------------------------- /src/file/preprocessing/PintsProcessor.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.BufferedWriter; 5 | import java.io.File; 6 | import java.io.FileReader; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.sql.Timestamp; 10 | import java.util.ArrayList; 11 | import java.util.LinkedHashMap; 12 | import java.util.LinkedHashSet; 13 | import java.util.List; 14 | import java.util.Map; 15 | import java.util.Set; 16 | 17 | public class PintsProcessor { 18 | 19 | // used for datasets from PINTS: Delicious (big) and Flickr 20 | public static boolean processFile(String dir, String inputFile, String outputFile) { 21 | 22 | Map> tagMap = new LinkedHashMap>(); 23 | List timestamps = new ArrayList(); 24 | 25 | try { 26 | FileReader reader = new FileReader(new File("./data/csv/" + dir + "/" + inputFile)); 27 | FileWriter writer = new FileWriter(new File("./data/csv/" + dir + "/" + outputFile + ".txt")); 28 | BufferedReader br = new BufferedReader(reader); 29 | BufferedWriter bw = new BufferedWriter(writer); 30 | String line = null; 31 | String resID = "", userHash = "", timestamp = "", tag = ""; 32 | 33 | while ((line = br.readLine()) != null) { 34 | String[] lineParts = line.split("\t"); 35 | if (lineParts.length < 4) { 36 | continue; 37 | } 38 | timestamp = lineParts[0]; 39 | userHash = lineParts[1]; 40 | resID = lineParts[2]; 41 | tag = lineParts[3].toLowerCase(); 42 | if (!(!tag.isEmpty() && !tag.equals("no-tag") && !tag.contains("-import") && !tag.contains("-export") && !tag.contains("sys:") && !tag.contains("system:") && !tag.contains("imported"))) { 43 | continue; 44 | } 45 | Set tags = tagMap.get(userHash + "_" + resID); 46 | if (tags == null) { 47 | tags = new LinkedHashSet(); 48 | tagMap.put(userHash + "_" + resID, tags); 49 | timestamps.add(timestamp); 50 | if (timestamps.size() % 100000 == 0) { 51 | System.out.println("READ 100000 bookmarks"); 52 | } 53 | } 54 | tags.add(tag); 55 | } 56 | 57 | int i = 0; 58 | for (Map.Entry> entry : tagMap.entrySet()) { 59 | Set tags = entry.getValue(); 60 | String[] parts = entry.getKey().split("_"); 61 | userHash = parts[0]; 62 | resID = parts[1]; 63 | timestamp = timestamps.get(i++); 64 | writeLine(bw, resID, userHash, timestamp, tags); 65 | if (i % 100000 == 0) { 66 | System.out.println("WROTE 100000 bookmarks"); 67 | } 68 | } 69 | 70 | br.close(); 71 | bw.flush(); 72 | bw.close(); 73 | return true; 74 | } catch (Exception e) { 75 | e.printStackTrace(); 76 | } 77 | return false; 78 | } 79 | 80 | private static boolean writeLine(BufferedWriter bw, String resID, String userHash, String timestamp, Set tags) { 81 | try { 82 | if (tags.size() == 0) { 83 | return false; 84 | } 85 | String tagString = ""; 86 | for (String tag : tags) { 87 | tagString += (tag + ","); 88 | } 89 | tagString = tagString.length() > 0 ? tagString.substring(0, tagString.length() - 1) : ""; 90 | 91 | bw.write("\"" + userHash + "\";\"" + resID + "\";\"" + processTimestamp(timestamp) + "\";\"" + tagString + "\";\"\"\n"); 92 | return true; 93 | } catch (IOException e) { 94 | e.printStackTrace(); 95 | } 96 | return false; 97 | } 98 | 99 | private static long processTimestamp(String timestamp) { 100 | return Timestamp.valueOf(timestamp).getTime() / 1000; // because of seconds 101 | } 102 | } -------------------------------------------------------------------------------- /src/file/preprocessing/TensorProcessor.java: -------------------------------------------------------------------------------- 1 | package file.preprocessing; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.util.LinkedHashSet; 8 | import java.util.List; 9 | import java.util.Set; 10 | 11 | import common.Bookmark; 12 | import common.Utilities; 13 | import file.BookmarkReader; 14 | import file.postprocessing.CatDescFiltering; 15 | 16 | public class TensorProcessor { 17 | 18 | private static Set entries; 19 | 20 | public static void writeFiles(String filename, int trainSize, int testSize, boolean tagRec, Integer minBookmarks, Integer maxBookmarks, CatDescFiltering filter) { 21 | entries = new LinkedHashSet(); 22 | //filename += "_res"; 23 | 24 | BookmarkReader reader = new BookmarkReader(trainSize, false); 25 | reader.readFile(filename); 26 | List trainList = reader.getBookmarks().subList(0, trainSize); 27 | List testList = reader.getBookmarks().subList(trainSize, trainSize + testSize); 28 | String name = "";//(tagRec ? "_tensor" : "_mymedialite"); 29 | String outputFilename = filename.split("_")[0]; 30 | // train file 31 | // TODO: reader 32 | createFile(trainList, "./data/csv/" + outputFilename + "_train" + name + ".txt", null, false, tagRec, minBookmarks, maxBookmarks, null); 33 | // test file 34 | String suffix = ""; 35 | if (filter != null) { 36 | suffix += ("_" + (filter.getDescriber() ? "desc" : "cat")); 37 | } 38 | createFile(testList, "./data/csv/" + outputFilename + suffix + "_test" + name + ".txt", null, true, tagRec, minBookmarks, maxBookmarks, filter); 39 | } 40 | 41 | private static void createFile(List list, String filename, BookmarkReader reader, boolean testset, boolean tagRec, Integer minBookmarks, Integer maxBookmarks, CatDescFiltering filter) { 42 | try { 43 | File tempFile = new File(filename); 44 | BufferedWriter bw = new BufferedWriter(new FileWriter(tempFile)); 45 | for (Bookmark data : list) { 46 | if (testset && reader != null) { // means test-set 47 | // TODO: check for resource 48 | if (!Utilities.isEntityEvaluated(reader, data.getUserID(), minBookmarks, maxBookmarks, false)) { 49 | continue; // skip this user if it shoudln't be evaluated 50 | } 51 | } 52 | if (filter != null) { // also for test-set 53 | if (!filter.evaluate(data.getUserID())) { 54 | continue; 55 | } 56 | } 57 | 58 | if (!entries.contains(data.getUserID() + "_" + data.getResourceID())) { 59 | if (tagRec) { 60 | for (int tag : data.getTags()) { 61 | bw.write(data.getUserID() + "\t" + data.getResourceID() + "\t" + tag + "\n"); 62 | } 63 | } else { 64 | String ratingString = ""; 65 | if (data.getRating() != -2) { 66 | ratingString = "\t" + (int)data.getRating(); 67 | } 68 | bw.write(data.getUserID() + "\t" + (reader == null ? data.getResourceID() : reader.getResources().get(data.getResourceID())) + ratingString + "\n"); 69 | } 70 | entries.add(data.getUserID() + "_" + data.getResourceID()); 71 | } 72 | } 73 | bw.flush(); 74 | bw.close(); 75 | } catch (IOException e) { 76 | e.printStackTrace(); 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/file/stemming/Among.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This class was automatically generated by a Snowball to Java compiler 3 | * It implements the stemming algorithm defined by a snowball script. 4 | */ 5 | 6 | package file.stemming; 7 | 8 | import java.lang.reflect.Method; 9 | 10 | public class Among { 11 | public Among (String s, int substring_i, int result, 12 | String methodname, SnowballProgram methodobject) { 13 | this.s_size = s.length(); 14 | this.s = s.toCharArray(); 15 | this.substring_i = substring_i; 16 | this.result = result; 17 | this.methodobject = methodobject; 18 | if (methodname.length() == 0) { 19 | this.method = null; 20 | } else { 21 | try { 22 | this.method = methodobject.getClass(). 23 | getDeclaredMethod(methodname, new Class[0]); 24 | } catch (NoSuchMethodException e) { 25 | throw new RuntimeException(e); 26 | } 27 | } 28 | } 29 | 30 | public final int s_size; /* search string */ 31 | public final char[] s; /* search string */ 32 | public final int substring_i; /* index to longest matching substring */ 33 | public final int result; /* result of the lookup */ 34 | public final Method method; /* method to use if substring matches */ 35 | public final SnowballProgram methodobject; /* object to invoke method on */ 36 | }; 37 | -------------------------------------------------------------------------------- /src/file/stemming/SnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This class was automatically generated by a Snowball to Java compiler 3 | * It implements the stemming algorithm defined by a snowball script. 4 | */ 5 | 6 | package file.stemming; 7 | 8 | import java.lang.reflect.InvocationTargetException; 9 | 10 | public abstract class SnowballStemmer extends SnowballProgram { 11 | public abstract boolean stem(); 12 | }; 13 | -------------------------------------------------------------------------------- /src/itemrecommendations/MPResourceCalculator.java: -------------------------------------------------------------------------------- 1 | package itemrecommendations; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedHashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.Timer; 8 | import java.util.TreeMap; 9 | import java.util.concurrent.TimeUnit; 10 | 11 | import com.google.common.base.Stopwatch; 12 | import com.google.common.primitives.Ints; 13 | 14 | import common.Bookmark; 15 | import common.IntMapComparator; 16 | import common.MemoryThread; 17 | import common.PerformanceMeasurement; 18 | import common.Utilities; 19 | import file.BookmarkReader; 20 | import file.PredictionFileWriter; 21 | 22 | public class MPResourceCalculator { 23 | 24 | private static String timeString; 25 | 26 | private static List getPopularResources(BookmarkReader reader, int count, int trainSize) { 27 | List resources = new ArrayList(); 28 | Map countMap = new LinkedHashMap(); 29 | for (int i = 0; i < reader.getResources().size(); i++) { 30 | countMap.put(i, reader.getResourceCounts().get(i)); 31 | } 32 | Map sortedCountMap = new TreeMap(new IntMapComparator(countMap)); 33 | sortedCountMap.putAll(countMap); 34 | 35 | for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) { 36 | List userResources = Bookmark.getResourcesFromUser(reader.getBookmarks().subList(0, trainSize), userID); 37 | //System.out.println(userResources.size()); 38 | List resIDs = new ArrayList(); 39 | int i = 0; 40 | for (Integer key : sortedCountMap.keySet()) { 41 | if (i < count) { 42 | if (!userResources.contains(key)) { 43 | resIDs.add(key); 44 | i++; 45 | } 46 | } else { 47 | break; 48 | } 49 | } 50 | resources.add(Ints.toArray(resIDs)); 51 | } 52 | return resources; 53 | } 54 | 55 | private static List getRandomResources(BookmarkReader reader, int count, int trainSize) { 56 | List resources = new ArrayList(); 57 | int resCount = reader.getResources().size(); 58 | 59 | for (int userID : reader.getUniqueUserListFromTestSet(trainSize)) { 60 | List userResources = Bookmark.getResourcesFromUser(reader.getBookmarks().subList(0, trainSize), userID); 61 | 62 | List resIDs = new ArrayList(); 63 | int i = 0; 64 | for (Integer res : Utilities.getRandomIndices(0, resCount - 1)) { 65 | if (i < count) { 66 | if (!userResources.contains(res)) { 67 | resIDs.add(res); 68 | i++; 69 | } 70 | } else { 71 | break; 72 | } 73 | } 74 | resources.add(Ints.toArray(resIDs)); 75 | } 76 | return resources; 77 | } 78 | 79 | public static BookmarkReader predictPopularResources(String filename, int trainSize, boolean writeTime) { 80 | Timer timerThread = new Timer(); 81 | MemoryThread memoryThread = new MemoryThread(); 82 | timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN); 83 | 84 | BookmarkReader reader = new BookmarkReader(trainSize, false); 85 | reader.readFile(filename); 86 | Stopwatch timer = new Stopwatch(); 87 | timer.start(); 88 | 89 | List values = getPopularResources(reader, 20, trainSize); 90 | 91 | timer.stop(); 92 | long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS); 93 | timer.reset(); 94 | timer.start(); 95 | PredictionFileWriter writer = new PredictionFileWriter(reader, values); 96 | writer.writeResourcePredictionsToFile(filename + "_mp", trainSize, 0); 97 | timer.stop(); 98 | long testTime = timer.elapsed(TimeUnit.MILLISECONDS); 99 | timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, reader.getBookmarks().size() - trainSize); 100 | 101 | timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory()); 102 | timerThread.cancel(); 103 | if (writeTime) { 104 | Utilities.writeStringToFile("./data/metrics/" + filename + "_mp_TIME.txt", timeString); 105 | } 106 | return reader; 107 | } 108 | 109 | public static BookmarkReader predictRandomResources(String filename, int trainSize, boolean writeTime) { 110 | BookmarkReader reader = new BookmarkReader(trainSize, false); 111 | reader.readFile(filename); 112 | 113 | List values = getRandomResources(reader, 20, trainSize); 114 | PredictionFileWriter writer = new PredictionFileWriter(reader, values); 115 | writer.writeResourcePredictionsToFile(filename + "_rand", trainSize, 0); 116 | return reader; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/processing/ContentBasedCalculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | package processing; 21 | 22 | import java.util.ArrayList; 23 | import java.util.LinkedHashMap; 24 | import java.util.List; 25 | import java.util.Map; 26 | import com.google.common.primitives.Ints; 27 | 28 | import file.PredictionFileWriter; 29 | import file.BookmarkReader; 30 | import common.Bookmark; 31 | 32 | public class ContentBasedCalculator { 33 | 34 | private final static int REC_LIMIT = 10; 35 | 36 | private BookmarkReader reader; 37 | private List trainList; 38 | 39 | public ContentBasedCalculator(BookmarkReader reader, int trainSize) { 40 | this.reader = reader; 41 | 42 | // TODO: use this data for recommendations 43 | this.trainList = this.reader.getBookmarks().subList(0, trainSize); 44 | } 45 | 46 | public Map getRankedTagList(int userID, int resID) { 47 | Map resultMap = new LinkedHashMap(); 48 | 49 | // TODO: calculate your recommendations here and return the top-10 (=REC_LIMIT) tags with probability value 50 | // have also a look on the other calculator classes! 51 | 52 | // TODO: in order to improve your content-based recommender, you can merge your results with other approaches like the ones from the LanguageModelCalculator or ActCalculator 53 | 54 | return resultMap; 55 | } 56 | 57 | // --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 58 | 59 | public static List> startContentBasedCreation(BookmarkReader reader, int sampleSize) { 60 | int size = reader.getBookmarks().size(); 61 | int trainSize = size - sampleSize; 62 | 63 | ContentBasedCalculator calculator = new ContentBasedCalculator(reader, trainSize); 64 | List> results = new ArrayList>(); 65 | if (trainSize == size) { 66 | trainSize = 0; 67 | } 68 | 69 | for (int i = trainSize; i < size; i++) { // the test-set 70 | Bookmark data = reader.getBookmarks().get(i); 71 | Map map = calculator.getRankedTagList(data.getUserID(), data.getResourceID()); 72 | results.add(map); 73 | } 74 | return results; 75 | } 76 | 77 | public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize) { 78 | BookmarkReader reader = new BookmarkReader(trainSize, false); 79 | reader.readFile(filename); 80 | 81 | List> modelValues = startContentBasedCreation(reader, sampleSize); 82 | 83 | List predictionValues = new ArrayList(); 84 | for (int i = 0; i < modelValues.size(); i++) { 85 | Map modelVal = modelValues.get(i); 86 | predictionValues.add(Ints.toArray(modelVal.keySet())); 87 | } 88 | String suffix = "_cb"; 89 | reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); 90 | PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); 91 | String outputFile = filename + suffix; 92 | writer.writeFile(outputFile); 93 | 94 | return reader; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/processing/FolkRankCalculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | package processing; 21 | 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | import java.util.SortedSet; 25 | import java.util.Timer; 26 | import java.util.concurrent.TimeUnit; 27 | 28 | import com.google.common.base.Stopwatch; 29 | 30 | import common.Bookmark; 31 | import common.MemoryThread; 32 | import common.PerformanceMeasurement; 33 | import common.Utilities; 34 | import file.PredictionFileWriter; 35 | import file.BookmarkReader; 36 | import processing.folkrank.*; 37 | 38 | public class FolkRankCalculator { 39 | 40 | private static List frResults; 41 | private static List prResults; 42 | private static String timeString; 43 | 44 | private static void startFolkRankCreation(BookmarkReader reader, int sampleSize) { 45 | System.out.println("\nStart FolkRank Calculation for Tags"); 46 | frResults = new ArrayList(); 47 | prResults = new ArrayList(); 48 | int size = reader.getBookmarks().size(); 49 | int trainSize = size - sampleSize; 50 | Stopwatch timer = new Stopwatch(); 51 | timer.start(); 52 | FactReader factReader = new WikipediaFactReader(reader, trainSize, 3); 53 | FactPreprocessor prep = new FactReaderFactPreprocessor(factReader); 54 | prep.process(); 55 | FolkRankData facts = prep.getFolkRankData(); 56 | 57 | FolkRankParam param = new FolkRankParam(); 58 | FolkRankPref pref = new FolkRankPref(new double[] {1.0, 1.0, 1.0}); 59 | int usrCounts = facts.getCounts()[1].length; 60 | System.out.println("Users: " + usrCounts); 61 | int resCounts = facts.getCounts()[2].length; 62 | System.out.println("Resources: " + resCounts); 63 | double[][] prefWeights = new double[][]{new double[]{}, new double[]{usrCounts}, new double[]{resCounts}}; 64 | FolkRankAlgorithm folk = new FolkRankAlgorithm(param); 65 | timer.stop(); 66 | long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS); 67 | 68 | timer.reset(); 69 | // start FolkRank 70 | for (int i = trainSize; i < size; i++) { 71 | timer.start(); 72 | Bookmark data = reader.getBookmarks().get(i); 73 | int u = data.getUserID(); 74 | int[] uPrefs = (u < usrCounts ? new int[]{u} : new int[]{}); 75 | int r = data.getResourceID(); 76 | int[] rPrefs = (r < resCounts ? new int[]{r} : new int[]{}); 77 | pref.setPreference(new int[][]{new int[]{}, uPrefs, rPrefs}, prefWeights); 78 | FolkRankResult result = folk.computeFolkRank(facts, pref); 79 | 80 | int[] topTags = new int[10]; 81 | SortedSet topKTags = ItemWithWeight.getTopK(facts, result.getWeights(), 10, 0); 82 | int count = 0; 83 | for (ItemWithWeight item : topKTags) { 84 | topTags[count++] = item.getItem(); 85 | } 86 | frResults.add(topTags); 87 | timer.stop(); 88 | 89 | int[] topTagsPr = new int[10]; 90 | SortedSet topKTagsPr = ItemWithWeight.getTopK(facts, result.getAPRWeights(), 10, 0); 91 | count = 0; 92 | for (ItemWithWeight item : topKTagsPr) { 93 | topTagsPr[count++] = item.getItem(); 94 | } 95 | prResults.add(topTagsPr); 96 | //System.out.println(u + "|" + data.getTags().toString().replace("[", "").replace("]", "") + 97 | // "|" + Arrays.toString(topTags).replace("[", "").replace("]", "") + 98 | // "|" + Arrays.toString(topTagsPr).replace("[", "").replace("]", "")); 99 | } 100 | long testTime = timer.elapsed(TimeUnit.MILLISECONDS); 101 | 102 | timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize); 103 | } 104 | 105 | public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize) { 106 | Timer timerThread = new Timer(); 107 | MemoryThread memoryThread = new MemoryThread(); 108 | timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN); 109 | 110 | BookmarkReader reader = new BookmarkReader(trainSize, false); 111 | reader.readFile(filename); 112 | List predictionValues = null; 113 | List prPredictionValues = null; 114 | startFolkRankCreation(reader, sampleSize); 115 | predictionValues = frResults; 116 | prPredictionValues = prResults; 117 | 118 | reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); 119 | PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); 120 | writer.writeFile(filename + "_fr"); 121 | PredictionFileWriter prWriter = new PredictionFileWriter(reader, prPredictionValues); 122 | prWriter.writeFile(filename + "_apr"); 123 | 124 | timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory()); 125 | timerThread.cancel(); 126 | Utilities.writeStringToFile("./data/metrics/" + filename + "_fr" + "_TIME.txt", timeString); 127 | return reader; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/processing/MPCalculator.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | package processing; 21 | 22 | import java.util.ArrayList; 23 | import java.util.LinkedHashMap; 24 | import java.util.List; 25 | import java.util.Map; 26 | import java.util.Timer; 27 | import java.util.TreeMap; 28 | import java.util.concurrent.TimeUnit; 29 | 30 | import com.google.common.base.Stopwatch; 31 | import com.google.common.primitives.Ints; 32 | 33 | import common.IntMapComparator; 34 | import common.Bookmark; 35 | import common.MemoryThread; 36 | import common.PerformanceMeasurement; 37 | import common.Utilities; 38 | import engine.EngineUtils; 39 | import file.PredictionFileWriter; 40 | import file.BookmarkReader; 41 | 42 | public class MPCalculator { 43 | 44 | private static String timeString; 45 | 46 | private static List getPerfectTags(BookmarkReader reader, int sampleSize, int limit) { 47 | List tags = new ArrayList(); 48 | int trainSize = reader.getBookmarks().size() - sampleSize; 49 | 50 | for (Bookmark data : reader.getBookmarks().subList(trainSize, trainSize + sampleSize)) { 51 | List t = new ArrayList(data.getTags()); 52 | //while (t.size() < limit) { 53 | // t.add(-1); 54 | //} 55 | tags.add(Ints.toArray(t)); 56 | } 57 | return tags; 58 | } 59 | 60 | private static int[] getPopularTagList(BookmarkReader reader, int size) { 61 | Map countMap = new LinkedHashMap(); 62 | for (int i = 0; i < reader.getTagCounts().size(); i++) { 63 | countMap.put(i, reader.getTagCounts().get(i)); 64 | } 65 | Map sortedCountMap = new TreeMap(new IntMapComparator(countMap)); 66 | sortedCountMap.putAll(countMap); 67 | int[] tagIDs = new int[size]; 68 | int i = 0; 69 | for (Integer key : sortedCountMap.keySet()) { 70 | if (i < size) { 71 | tagIDs[i++] = key; 72 | } else { 73 | break; 74 | } 75 | } 76 | return tagIDs; 77 | } 78 | 79 | private static List getPopularTags(BookmarkReader reader, int sampleSize, int limit) { 80 | List tags = new ArrayList(); 81 | Stopwatch timer = new Stopwatch(); 82 | timer.start(); 83 | 84 | int[] tagIDs = getPopularTagList(reader, limit); 85 | 86 | timer.stop(); 87 | long trainingTime = timer.elapsed(TimeUnit.MILLISECONDS); 88 | timer.reset(); 89 | timer.start(); 90 | for (int j = 0; j < sampleSize; j++) { 91 | tags.add(tagIDs); 92 | } 93 | timer.stop(); 94 | long testTime = timer.elapsed(TimeUnit.MILLISECONDS); 95 | 96 | timeString = PerformanceMeasurement.addTimeMeasurement(timeString, true, trainingTime, testTime, sampleSize); 97 | return tags; 98 | } 99 | 100 | private static List getPopularTagsFiltered(BookmarkReader reader, int trainSize, int sampleSize, int limit) { 101 | System.out.println("MP: Filter own entities"); 102 | 103 | List tags = new ArrayList(); 104 | int[] tagIDs = getPopularTagList(reader, reader.getTags().size()); 105 | List> userMaps = Utilities.getUserMaps(reader.getBookmarks().subList(0, trainSize)); 106 | 107 | for (int j = trainSize; j < trainSize + sampleSize; j++) { 108 | Map filterTags = userMaps.get(reader.getBookmarks().get(j).getUserID()); 109 | List returnTags = new ArrayList(); 110 | for (int popTag: tagIDs) { 111 | if (returnTags.size() < limit) { 112 | if (!filterTags.containsKey(popTag)) { 113 | returnTags.add(popTag); 114 | } 115 | } else { 116 | break; 117 | } 118 | } 119 | tags.add(Ints.toArray(returnTags)); 120 | } 121 | 122 | return tags; 123 | } 124 | 125 | // public statics -------------------------------------------------------------------------------------------- 126 | public static BookmarkReader predictPopularTags(String filename, int trainSize, int sampleSize, boolean mp) { 127 | Timer timerThread = new Timer(); 128 | MemoryThread memoryThread = new MemoryThread(); 129 | timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN); 130 | 131 | BookmarkReader reader = new BookmarkReader(trainSize, false); 132 | reader.readFile(filename); 133 | 134 | List values = null; 135 | if (mp) { 136 | if (!Utilities.FILTER_OWN) { 137 | values = getPopularTags(reader, sampleSize, Utilities.REC_LIMIT); 138 | } else { 139 | values = getPopularTagsFiltered(reader, trainSize, sampleSize, Utilities.REC_LIMIT); 140 | } 141 | } else { 142 | values = getPerfectTags(reader, sampleSize, Utilities.REC_LIMIT); 143 | } 144 | 145 | reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); 146 | PredictionFileWriter writer = new PredictionFileWriter(reader, values); 147 | writer.writeFile(filename + "_mp"); 148 | 149 | timeString = PerformanceMeasurement.addMemoryMeasurement(timeString, false, memoryThread.getMaxMemory()); 150 | timerThread.cancel(); 151 | Utilities.writeStringToFile("./data/metrics/" + filename + "_mp" + "_TIME.txt", timeString); 152 | return reader; 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/processing/RecencyCalculator.java: -------------------------------------------------------------------------------- 1 | package processing; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.LinkedHashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Timer; 9 | import java.util.TreeMap; 10 | import java.util.concurrent.TimeUnit; 11 | 12 | import com.google.common.base.Stopwatch; 13 | import com.google.common.primitives.Ints; 14 | 15 | import common.DoubleMapComparator; 16 | import common.Bookmark; 17 | import common.MemoryThread; 18 | import common.PerformanceMeasurement; 19 | import common.Utilities; 20 | import file.PredictionFileWriter; 21 | import file.BookmarkReader; 22 | 23 | public class RecencyCalculator { 24 | 25 | private List> userBookmarks; 26 | 27 | public RecencyCalculator(BookmarkReader reader, int trainSize) { 28 | List trainList = reader.getBookmarks().subList(0, trainSize); 29 | this.userBookmarks = Utilities.getBookmarks(trainList, false); 30 | } 31 | 32 | public Map getRankedTagList(int userID) { 33 | Map returnMap = new LinkedHashMap(); 34 | if (userID >= this.userBookmarks.size()) { 35 | return returnMap; 36 | } 37 | List bookmarks = this.userBookmarks.get(userID); 38 | Collections.sort(bookmarks); 39 | 40 | int count = 0; 41 | int index = bookmarks.size() - 1; 42 | while (count < Utilities.REC_LIMIT && index >= 0) { 43 | Bookmark b = bookmarks.get(index--); 44 | for (int t : b.getTags()) { 45 | if (!returnMap.containsKey(t)) { 46 | returnMap.put(t, (double)Utilities.REC_LIMIT - count++); 47 | if (count >= Utilities.REC_LIMIT) { 48 | break; 49 | } 50 | } 51 | } 52 | } 53 | return returnMap; 54 | } 55 | 56 | // Statics ---------------------------------------------------------------------------------------------------------------------- 57 | public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize) { 58 | BookmarkReader reader = new BookmarkReader(trainSize, false); 59 | reader.readFile(filename); 60 | List predictionValues = new ArrayList(); 61 | Map predictionValueMap = new LinkedHashMap(); 62 | RecencyCalculator calculator = new RecencyCalculator(reader, trainSize); 63 | 64 | for (int i = trainSize; i < trainSize + sampleSize; i++) { // the test-set 65 | Bookmark data = reader.getBookmarks().get(i); 66 | if (predictionValueMap.containsKey(data.getUserID())) { 67 | predictionValues.add(predictionValueMap.get(data.getUserID())); 68 | } else { 69 | Map map = calculator.getRankedTagList(data.getUserID()); 70 | int[] predictionKeys = Ints.toArray(map.keySet()); 71 | predictionValueMap.put(data.getUserID(), predictionKeys); 72 | predictionValues.add(predictionKeys); 73 | } 74 | } 75 | 76 | reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); 77 | PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); 78 | writer.writeFile(filename + "_rec"); 79 | return reader; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/processing/analyzing/ReuseProbValue.java: -------------------------------------------------------------------------------- 1 | package processing.analyzing; 2 | 3 | public class ReuseProbValue { 4 | 5 | private double sum; 6 | private double count; 7 | 8 | public ReuseProbValue() { 9 | this.sum = 0.0; 10 | this.count = 0.0; 11 | } 12 | 13 | public void increment(double value) { 14 | this.sum += value; 15 | this.count++; 16 | } 17 | 18 | public double getCount() { 19 | return this.count; 20 | } 21 | 22 | public double getSum() { 23 | return this.sum; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/processing/analyzing/UserTagDistribution.java: -------------------------------------------------------------------------------- 1 | package processing.analyzing; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.util.ArrayList; 7 | import java.util.Collections; 8 | import java.util.HashSet; 9 | import java.util.List; 10 | import java.util.Set; 11 | 12 | import common.Bookmark; 13 | import common.Utilities; 14 | import file.BookmarkReader; 15 | 16 | public class UserTagDistribution { 17 | 18 | public static void calculate(BookmarkReader reader, String dataset) { 19 | List userSizes = new ArrayList(); 20 | List> userBookmarks = Utilities.getBookmarks(reader.getBookmarks(), false); 21 | double reuseRatio = 0.0; 22 | for (List userB : userBookmarks) { 23 | Set userResources = new HashSet(); 24 | double reuseCount = 0.0; 25 | for (Bookmark b : userB) { 26 | if (userResources.contains(b.getResourceID())) { 27 | reuseCount++; 28 | } else { 29 | userResources.add(b.getResourceID()); 30 | } 31 | } 32 | userSizes.add(userB.size()); 33 | reuseRatio += (reuseCount / userB.size()); 34 | } 35 | Collections.sort(userSizes, Collections.reverseOrder()); 36 | System.out.println("Resource reuse ratio: " + reuseRatio / userBookmarks.size()); 37 | 38 | try { 39 | FileWriter userWriter = new FileWriter(new File("./data/csv/" + dataset + "_userDist.txt")); 40 | BufferedWriter userBW = new BufferedWriter(userWriter); 41 | for (int size : userSizes) { 42 | userBW.write(size + "\n"); 43 | } 44 | userBW.close(); 45 | } catch(Exception e) { 46 | e.printStackTrace(); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/processing/analyzing/UserTagProperties.java: -------------------------------------------------------------------------------- 1 | package processing.analyzing; 2 | 3 | import java.util.LinkedHashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import common.Bookmark; 8 | import common.CooccurenceMatrix; 9 | 10 | public class UserTagProperties { 11 | 12 | private final static double TIME_CONST = 1.0; // for seconds 13 | //private final static double TIME_CONST = 3600.0; // for hours 14 | //private final static double TIME_CONST = 86400.0; // for days 15 | 16 | private Map tagCounts = null; 17 | private Map tagRecencies = null; 18 | private Map tagContextSim = null; 19 | private Map tagReuseProb = null; 20 | 21 | public UserTagProperties(List userBookmarks, Bookmark testBookmark, CooccurenceMatrix tagMatrix) { 22 | this.tagCounts = new LinkedHashMap(); 23 | this.tagRecencies = new LinkedHashMap(); 24 | this.tagContextSim = new LinkedHashMap(); 25 | this.tagReuseProb = new LinkedHashMap(); 26 | 27 | for (Bookmark bookmark : userBookmarks) { 28 | int currentRecency = (int) Math.round((Long.parseLong(testBookmark.getTimestamp()) - Long.parseLong(bookmark.getTimestamp())) / TIME_CONST); 29 | for (int tag : bookmark.getTags()) { 30 | Integer count = this.tagCounts.get(tag); 31 | Integer recency = this.tagRecencies.get(tag); 32 | Integer contextSim = this.tagContextSim.get(tag); 33 | Integer reuseProb = this.tagReuseProb.get(tag); 34 | 35 | if (count == null) { 36 | this.tagCounts.put(tag, 1); 37 | } else { 38 | this.tagCounts.put(tag, count.intValue() + 1); 39 | } 40 | if (recency == null) { 41 | this.tagRecencies.put(tag, currentRecency); 42 | } else { 43 | if (currentRecency < recency) { 44 | this.tagRecencies.put(tag, currentRecency); 45 | } 46 | } 47 | if (tagMatrix != null) { 48 | if (contextSim == null) { 49 | this.tagContextSim.put(tag, tagMatrix.getCoocurenceCount(tag, testBookmark.getTags())); 50 | } 51 | } 52 | if (reuseProb == null) { 53 | this.tagReuseProb.put(tag, testBookmark.getTags().contains(tag) ? 1 : 0); 54 | } 55 | } 56 | } 57 | } 58 | 59 | public Map getTagCounts() { 60 | return this.tagCounts; 61 | } 62 | 63 | public Map getTagRecencies() { 64 | return this.tagRecencies; 65 | } 66 | 67 | public Map getTagContextSim() { 68 | return this.tagContextSim; 69 | } 70 | 71 | public Map getReuseProb() { 72 | return this.tagReuseProb; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/processing/folkrank/APRFolkRankResult.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | import java.util.Arrays; 36 | 37 | public class APRFolkRankResult extends StandardFolkRankResult implements FolkRankResult { 38 | 39 | private double[][] weights; 40 | 41 | public double[][] getAPRWeights() { 42 | return weights; 43 | } 44 | public void setAPRWeights(double[][] weights) { 45 | // copy weights, because array is overwritten 46 | this.weights = new double[weights.length][]; 47 | for (int dim = 0; dim < weights.length; dim++) { 48 | this.weights[dim] = weights[dim].clone(); 49 | } 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/processing/folkrank/FactPreprocessor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | /** A FactPreprocessor reads facts from a source (for example a FactReader) and 36 | * creates a FolkRankData object. 37 | * 38 | * Since during the FolkRank computation all data is represented in integers 39 | * (and there exists only the mapping back to strings in memory) there must be 40 | * an option to set preference for items given by their strings. Therefore the 41 | * interface demands the method setPrefItems which accepts items as strings and 42 | * the method getPrefItems which returns them in integer representation. 43 | * 44 | * @author rja 45 | */ 46 | public interface FactPreprocessor { 47 | 48 | /* 49 | * NOTE: the methods here are given in the order they should typically be 50 | * called. Although some preprocessors might allow different call order. 51 | */ 52 | 53 | 54 | /** Gives the preprocessor items for each dimension which the preprocessor 55 | * shall map to their integer representation during processing. The 56 | * resulting integers can be accessed with getPrefItems(). 57 | * 58 | * @param prefItems - an array of strings for each dimension. Each string 59 | * represents an item occuring in the facts and which should be mapped to 60 | * an integer in order to give it preference. 61 | */ 62 | public void setPrefItems (String [][] prefItems); 63 | 64 | /** Calling this method starts the preprocessing which includes reading the 65 | * facts, mapping them into an array of integers, saving the integer to 66 | * string mappings and saving the integers for the prefItems. 67 | */ 68 | public void process (); 69 | 70 | /** Returns the complete input data neccessary for the FolkRank computation. 71 | * 72 | * @return A FolkRankData object which contains the data for the 73 | * computation. 74 | */ 75 | public FolkRankData getFolkRankData (); 76 | 77 | /** Returns an integer array where the numbers in each dimension represent 78 | * the items given with setPrefItems(). This array can be used to set the 79 | * preference items in {@link FolkRankParam.setPreference}. 80 | * @return - for each dimension an array of integers representing the items 81 | * given as strings with setPrefItems. 82 | */ 83 | public int[][] getPrefItems (); 84 | } 85 | -------------------------------------------------------------------------------- /src/processing/folkrank/FactReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | 36 | /** A FactReader returns every single fact as an array of strings (for each 37 | * dimension one string). By reading the facts until hasNext() == false one 38 | * can gather all facts of a dataset. 39 | * 40 | * @author rja 41 | */ 42 | public interface FactReader { 43 | 44 | /** Returns the next fact as an array of strings - one string for each 45 | * dimension. 46 | * 47 | * @return An Array of strings, one string for each dimension. 48 | * @throws FactReadingException 49 | */ 50 | public String[] getFact() throws FactReadingException; 51 | /** Tests if there are more facts available to read. 52 | * 53 | * @return true if there are more facts to read. 54 | * @throws FactReadingException 55 | */ 56 | public boolean hasNext() throws FactReadingException; 57 | /** Resets the fact reader so that the facts can be read again. 58 | * Note: some implementations may not return the same facts 59 | * after calling reset() since the facts may have changed since the last 60 | * reading. 61 | * 62 | * @throws FactReadingException 63 | */ 64 | public void reset() throws FactReadingException; 65 | /** Returns the number of dimensions of the fact source. 66 | * @return An integer determining the number of dimensions. 67 | * @throws FactReadingException 68 | */ 69 | public int getNoOfDimensions() throws FactReadingException; 70 | /** Close the reader. One a reader has been closed, reset(), hasNext() or 71 | * getFact() operations will throw a FactReadingException. 72 | * 73 | * @throws FactReadingException 74 | */ 75 | public void close () throws FactReadingException; 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/processing/folkrank/FactReadingException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | /** This exception is thrown by FactReaders when an error occurs during 36 | * initalization or reading. 37 | * 38 | * @author rja 39 | */ 40 | public class FactReadingException extends Exception { 41 | 42 | /** Last time computed: 2007-02-01 43 | * 44 | */ 45 | private static final long serialVersionUID = -3668806000537549237L; 46 | 47 | /** Constructor which allows to add a message and an existing exception to 48 | * this exception. 49 | * 50 | * @param message 51 | * @param cause 52 | */ 53 | public FactReadingException(String message, Throwable cause) { 54 | super(message, cause); 55 | } 56 | 57 | /** Constructor which allows to a add a message to this exception. 58 | * 59 | * @param message 60 | */ 61 | public FactReadingException(String message) { 62 | super(message); 63 | } 64 | 65 | /** Constructor which allows to add an existing exception to this exception. 66 | * @param cause 67 | */ 68 | public FactReadingException(Throwable cause) { 69 | super(cause); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/processing/folkrank/FolkRankPref.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | /** The FolkRankPref class holds the preference weights for the 36 | * FolkRank algorithm. 37 | * 38 | * @author rja 39 | */ 40 | public class FolkRankPref { 41 | 42 | /** For each dimension a list of items which will get extra preference 43 | * weight. 44 | */ 45 | private int[][] prefItems = null; 46 | /** For each dimension a list of preference weights which will be given to 47 | * the items in prefItems. 48 | */ 49 | private double[][] prefValues = null; 50 | /** For each dimension the weight each item gets. 51 | * 52 | */ 53 | private double[] basePrefWeight = null; 54 | 55 | /** Sets for each dimension the preference weight each node gets. 56 | * @param basePrefWeight - For each dimension the weight each item 57 | * gets. 58 | */ 59 | public FolkRankPref (double[] basePrefWeight) { 60 | this.basePrefWeight = basePrefWeight; 61 | } 62 | 63 | /** Set the items which will get extra preference and the corresponding 64 | * preferene values. 65 | * 66 | * @param prefItems - For each dimension an array of items which will get 67 | * extra preference. 68 | * @param prefValues - For each dimension an array of preference values 69 | * which the corresponding item from prefItems will get. 70 | */ 71 | public void setPreference (int[][] prefItems, double[][] prefValues) { 72 | this.prefItems = prefItems; 73 | this.prefValues = prefValues; 74 | } 75 | 76 | /** Sets for each dimension the preference weight each node gets. 77 | * @param basePrefWeight - For each dimension the weight each item 78 | * gets. 79 | */ 80 | /*private void setBasePrefWeight(double[] basePrefWeight) { 81 | this.basePrefWeight = basePrefWeight; 82 | }*/ 83 | 84 | /** Returns the array which contains for each dimension the items which 85 | * should get extra preference weight. 86 | * 87 | * @return An array of items which should get extra preference weight. 88 | */ 89 | public int[][] getPrefItems() { 90 | return prefItems; 91 | } 92 | /** Returns an array which contains for each dimension the values the items 93 | * in prefItems get as extra preference. 94 | * 95 | * @return An array of preference values. 96 | */ 97 | public double[][] getPrefValues() { 98 | return prefValues; 99 | } 100 | 101 | /** Returns the preference weight for each dimension which 102 | * each item gets. 103 | * 104 | * @return An array of preference weights. 105 | */ 106 | public double[] getBasePrefWeight() { 107 | return basePrefWeight; 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/processing/folkrank/FolkRankResult.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | import java.util.LinkedList; 36 | 37 | public interface FolkRankResult { 38 | 39 | /** Returns the weight vectors for each dimension. 40 | * 41 | * @return An array of weight vectors - one vector for each dimension. 42 | */ 43 | public double[][] getWeights(); 44 | 45 | /** Sets the weight vectors. 46 | * 47 | * @param weights - An array of weight vectors - one for each dimension. 48 | */ 49 | public void setWeights(double[][] weights); 50 | 51 | /** Returns a list of errors from the computation. For each iteration one 52 | * error value. 53 | * 54 | * @return A list of double values. For each iteration one value. 55 | */ 56 | public LinkedList getErrors(); 57 | 58 | /** Add an error value to the list of error values. 59 | * 60 | * @param error - An error value. 61 | */ 62 | public void addError(double error); 63 | 64 | 65 | /** Returns the weight vectors of the adapted PageRank for each dimension. 66 | * 67 | * @return An array of weight vectors - one vector for each dimension. 68 | */ 69 | public double[][] getAPRWeights(); 70 | 71 | /** Sets the weight vectors for the adapted PageRank. 72 | * 73 | * @param weights - An array of weight vectors - one for each dimension. 74 | */ 75 | public void setAPRWeights(double[][] weights); 76 | 77 | } -------------------------------------------------------------------------------- /src/processing/folkrank/ItemWithWeight.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | import java.util.SortedSet; 36 | import java.util.TreeSet; 37 | 38 | /** Internal class which is used to represent the top-k items together with 39 | * their weight in a set when finding them. 40 | * 41 | * @author rja 42 | */ 43 | public class ItemWithWeight implements Comparable { 44 | /** 45 | * An int which represents the item. 46 | */ 47 | public int item; 48 | /** 49 | * The weight of the item. 50 | */ 51 | public double weight; 52 | /** The only available constructor. 53 | * 54 | * @param item - a string which represents the item. 55 | * @param weight - the weight of the item. 56 | */ 57 | public ItemWithWeight(int item, double weight) { 58 | super(); 59 | this.item = item; 60 | this.weight = weight; 61 | } 62 | /** 63 | * Disabled default constructor. 64 | */ 65 | private ItemWithWeight () { 66 | /* 67 | * do nothing, since this constructor is not useable. 68 | */ 69 | } 70 | 71 | /** Returns the item. 72 | * @return - the item. 73 | */ 74 | public int getItem() { 75 | return item; 76 | } 77 | /** Returns the weight of the item. 78 | * @return - the weight of the item. 79 | */ 80 | public double getWeight() { 81 | return weight; 82 | } 83 | 84 | /** 85 | * @see java.lang.Object#equals(java.lang.Object) 86 | */ 87 | @Override 88 | public boolean equals(Object obj) { 89 | if (! (obj instanceof ItemWithWeight)) { 90 | return false; 91 | } 92 | return equals((ItemWithWeight) obj); 93 | } 94 | 95 | /** Two items are equal if there string representations are equal. This 96 | * is true, since only items of the same dimension should be compared. 97 | * 98 | * @param other - the item to compare with this item. 99 | * @return - true if this.item == other.item. 100 | */ 101 | private boolean equals (ItemWithWeight other) { 102 | return this.item == other.item; 103 | } 104 | 105 | /** Compares two items by their weight. 106 | * @param o - the other item to compare with this item. 107 | * @return - 0 if they're equal, -1/+1 otherwise. 108 | * 109 | * @see java.lang.Comparable#compareTo(java.lang.Object) 110 | */ 111 | public int compareTo(ItemWithWeight o) { 112 | if (o == null) { throw new NullPointerException(); } 113 | int sgn = (int) Math.signum(o.weight - this.weight); 114 | if (sgn != 0) { 115 | return sgn; 116 | } else { 117 | return o.item - this.item; 118 | } 119 | } 120 | /** 121 | * @see java.lang.Object#hashCode() 122 | */ 123 | public int hashCode () { 124 | return item; 125 | } 126 | 127 | public static SortedSet getTopK (FolkRankData facts, double[][] weights, int k, int dim) { 128 | double minWeight; 129 | SortedSet set = new TreeSet(); 130 | 131 | minWeight = -100; // consider only items with positive weight 132 | for (int item = 0; item < weights[dim].length; item++) { 133 | double currWeight = weights[dim][item]; 134 | if (currWeight > minWeight) { 135 | /* new weight to consider found */ 136 | set.add(new ItemWithWeight(item, currWeight)); 137 | if (set.size() > k) { 138 | // new best weight, since we have more than k items in set 139 | ItemWithWeight last = set.last(); 140 | set.remove(last); 141 | minWeight = set.last().weight; 142 | } 143 | } 144 | } 145 | 146 | return set; 147 | } 148 | } -------------------------------------------------------------------------------- /src/processing/folkrank/LeavePostOutFolkRankDataDuplicator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | 36 | public class LeavePostOutFolkRankDataDuplicator { 37 | 38 | public static final int U = 1; 39 | public static final int R = 2; 40 | 41 | 42 | /** Precondition: leaving out u and r does NOT cause u or r to be completely 43 | * removed (i.e. there exists at least one TAS with u and one TAS with r) 44 | * @param in 45 | * @param user 46 | * @param resource 47 | * @return 48 | */ 49 | public FolkRankData getDuplicate (FolkRankData in, int user, int resource) { 50 | /* 51 | * count number of tas for this user/post combination 52 | */ 53 | int[][] facts = in.getFacts(); 54 | int tasCtr = 0; 55 | for (int fact[]: facts) { 56 | if (fact[U] == user && fact[R] == resource) tasCtr++; 57 | } 58 | /* 59 | * calculate number of items per dimension 60 | */ 61 | int[][] counts = in.getCounts(); 62 | int[] noOfItemsPerDimension = new int[counts.length]; 63 | for (int dim = 0; dim < counts.length; dim++) { 64 | noOfItemsPerDimension[dim] = counts[dim].length; 65 | } 66 | FolkRankData out = new FolkRankData(facts.length - tasCtr, noOfItemsPerDimension); 67 | 68 | /* 69 | * copy fact 70 | */ 71 | int factId = 0; 72 | for (int fact[]: facts) { 73 | if (fact[U] == user && fact[R] == resource) continue; 74 | out.setFact(factId, fact); 75 | factId++; 76 | } 77 | 78 | return out; 79 | } 80 | 81 | public FolkRankData getDuplicate (FolkRankData in, int user) { 82 | /* 83 | * count number of tas for this user/post combination 84 | */ 85 | int[][] facts = in.getFacts(); 86 | int tasCtr = 0; 87 | for (int fact[]: facts) { 88 | if (fact[U] == user) tasCtr++; 89 | } 90 | /* 91 | * calculate number of items per dimension 92 | */ 93 | int[][] counts = in.getCounts(); 94 | int[] noOfItemsPerDimension = new int[counts.length]; 95 | for (int dim = 0; dim < counts.length; dim++) { 96 | noOfItemsPerDimension[dim] = counts[dim].length; 97 | } 98 | FolkRankData out = new FolkRankData(facts.length - tasCtr, noOfItemsPerDimension); 99 | 100 | /* 101 | * copy fact 102 | */ 103 | int factId = 0; 104 | for (int fact[]: facts) { 105 | if (fact[U] == user) continue; 106 | out.setFact(factId, fact); 107 | factId++; 108 | } 109 | 110 | return out; 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/processing/folkrank/SeparatedWeightInitializationStrategy.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | import java.util.Arrays; 36 | 37 | /** This strategy spreads over every dimension one unit of weight. This means 38 | * that the (initial) weights sum up to one for every dimension (as do the 39 | * prefWeights, when the preference is included). 40 | * 41 | * @author rja 42 | */ 43 | public class SeparatedWeightInitializationStrategy implements WeightInitializationStrategy { 44 | 45 | /** 46 | * @see org.semanticdesktop.nepomuk.comp.folkpeer.folkrank.strategy.WeightInitializationStrategy#initalizeWeights(org.semanticdesktop.nepomuk.comp.folkpeer.folkrank.data.FolkRankParam, double[][], double[], double[]) 47 | */ 48 | public void initalizeWeights(FolkRankPref pref, double[][] weights, double[] prefWeights, double[] prefWeightsNormFactors) { 49 | /* 50 | * check input arguments 51 | */ 52 | if (weights.length != prefWeights.length || weights.length != prefWeightsNormFactors.length) 53 | throw new IllegalArgumentException("Sizes of first dimension of input parameters do not match."); 54 | 55 | double[] basePrefWeight = pref.getBasePrefWeight(); 56 | double[][] prefValues = pref.getPrefValues(); 57 | 58 | for (int dim = 0; dim < weights.length; dim++) { 59 | /* 60 | * Initialize weights: 61 | * every item gets the reciprocal of the number of items in the 62 | * dimensions it belongs to. 63 | */ 64 | Arrays.fill(weights[dim], 1.0 / weights[dim].length); 65 | 66 | /* 67 | * Calculate the sum of the preference weights. 68 | */ 69 | prefWeightsNormFactors[dim] = 0.0; 70 | if (prefValues != null) { 71 | for (double prefValue:prefValues[dim]) { 72 | prefWeightsNormFactors[dim] += prefValue; 73 | } 74 | } 75 | 76 | /* 77 | * Calculate, how many additional weight is spread by the preference 78 | */ 79 | prefWeightsNormFactors[dim] = basePrefWeight[dim] * weights[dim].length + prefWeightsNormFactors[dim]; 80 | 81 | /* 82 | * initialize the preference weights each item gets 83 | */ 84 | if (basePrefWeight[dim] == 0.0) { 85 | /* 86 | * prevent 0.0 / 0.0 = NaN 87 | */ 88 | prefWeights[dim] = 0.0; 89 | } else { 90 | prefWeights[dim] = basePrefWeight[dim] / prefWeightsNormFactors[dim]; 91 | } 92 | } 93 | } 94 | 95 | } 96 | -------------------------------------------------------------------------------- /src/processing/folkrank/StandardFolkRankResult.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | import java.util.LinkedList; 36 | 37 | /** This class stores the result of the FolkRank computation, that is basically 38 | * a weight vector for each dimension. 39 | * 40 | * @author rja 41 | */ 42 | public class StandardFolkRankResult implements FolkRankResult { 43 | 44 | private double[][] weights; 45 | private LinkedList errors = new LinkedList(); 46 | 47 | /** 48 | * @see org.semanticdesktop.nepomuk.comp.folkpeer.folkrank.data.FolkRankResult#getWeights() 49 | */ 50 | public double[][] getWeights() { 51 | return weights; 52 | } 53 | /** 54 | * @see org.semanticdesktop.nepomuk.comp.folkpeer.folkrank.data.FolkRankResult#setWeights(double[][]) 55 | */ 56 | public void setWeights(double[][] weights) { 57 | this.weights = weights; 58 | } 59 | 60 | /** 61 | * @see org.semanticdesktop.nepomuk.comp.folkpeer.folkrank.data.FolkRankResult#getErrors() 62 | */ 63 | public LinkedList getErrors() { 64 | return errors; 65 | } 66 | /** 67 | * @see org.semanticdesktop.nepomuk.comp.folkpeer.folkrank.data.FolkRankResult#addError(double) 68 | */ 69 | public void addError(double error) { 70 | this.errors.add(error); 71 | } 72 | 73 | /* 74 | * not supported by this implementation 75 | */ 76 | public double[][] getAPRWeights() { 77 | return null; 78 | } 79 | public void setAPRWeights(double[][] weights) { 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/processing/folkrank/WeightInitializationStrategy.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2006-2009, NEPOMUK Consortium 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the NEPOMUK Consortium nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **/ 33 | package processing.folkrank; 34 | 35 | /** A weight initialization strategy is used to initialize weight and preference 36 | * vectors in a FolkRank computation. 37 | * 38 | * @author rja 39 | */ 40 | public interface WeightInitializationStrategy { 41 | 42 | /** Initializes the vectors weights, prefWeights and prefWeightsNormFactors 43 | * with the help of the given parameters param. 44 | * Note that the memory for all vectors must be allocated before calling 45 | * initializeWeights! 46 | * 47 | * @param pref - The preference to be used for initialization. 48 | * 49 | * @param weights - The output vector - "real" weight vector used for the 50 | * FolkRank computation; initialized in this method. 51 | * 52 | * @param prefWeights - The output vector which specifies for every 53 | * dimension how many preference weight each item in that dimension 54 | * gets. 55 | * 56 | * @param prefWeightsNormFactors - The output vector which specifies for 57 | * every dimension the value the additional preferences have to be divided 58 | * with such that everything sums up to one. 59 | */ 60 | public abstract void initalizeWeights( 61 | FolkRankPref pref, 62 | double[][] weights, 63 | double[] prefWeights, 64 | double[] prefWeightsNormFactors); 65 | 66 | } -------------------------------------------------------------------------------- /src/processing/folkrank/WikipediaFactReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | TagRecommender: 3 | A framework to implement and evaluate algorithms for the recommendation 4 | of tags. 5 | Copyright (C) 2013 Dominik Kowald 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU Affero General Public License as 9 | published by the Free Software Foundation, either version 3 of the 10 | License, or (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Affero General Public License for more details. 16 | 17 | You should have received a copy of the GNU Affero General Public License 18 | along with this program. If not, see . 19 | */ 20 | 21 | package processing.folkrank; 22 | 23 | import common.Bookmark; 24 | import file.BookmarkReader; 25 | 26 | public class WikipediaFactReader implements FactReader { 27 | 28 | private BookmarkReader reader; 29 | private int trainSize; 30 | private int lineIndex; 31 | private int tagIndex; 32 | private int noOfDimensions; 33 | 34 | public WikipediaFactReader (BookmarkReader reader, int trainSize, int noOfDimensions) { 35 | this.noOfDimensions = noOfDimensions; 36 | this.reader = reader; 37 | this.trainSize = trainSize; 38 | this.lineIndex = 0; 39 | this.tagIndex = -1; 40 | } 41 | 42 | public String[] getFact() throws FactReadingException { 43 | Bookmark data = this.reader.getBookmarks().get(this.lineIndex); 44 | String[] fact = new String[this.noOfDimensions]; 45 | 46 | fact[0] = data.getTags().get(this.tagIndex).toString(); 47 | fact[1] = Integer.toString(data.getUserID()); 48 | fact[2] = Integer.toString(data.getResourceID()); 49 | 50 | return fact; 51 | } 52 | 53 | public boolean hasNext() throws FactReadingException { 54 | if (this.lineIndex < this.trainSize) { 55 | Bookmark data = this.reader.getBookmarks().get(this.lineIndex); 56 | if (++this.tagIndex < data.getTags().size()) { 57 | return true; 58 | } else { 59 | this.tagIndex = 0; 60 | while (true) { // go to the next line with tags 61 | if (++this.lineIndex < this.trainSize) { 62 | if (this.reader.getBookmarks().get(this.lineIndex).getTags().size() > 0) { 63 | return true; 64 | } else { 65 | continue; 66 | } 67 | } else { 68 | break; 69 | } 70 | } 71 | } 72 | } 73 | return false; 74 | } 75 | 76 | public void reset() throws FactReadingException { 77 | this.lineIndex = 0; 78 | this.tagIndex = -1; 79 | } 80 | 81 | public int getNoOfDimensions() throws FactReadingException { 82 | return this.noOfDimensions; 83 | } 84 | 85 | public void close() throws FactReadingException { 86 | reset(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/processing/hashtag/TagRecommendationUtil.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag; 2 | 3 | import java.util.Map; 4 | import java.util.TreeMap; 5 | import common.DoubleMapComparatorGeneric; 6 | 7 | public class TagRecommendationUtil { 8 | /** 9 | * Sorted Map values ascending to descending. 10 | * 11 | * @param resultMap 12 | * @return 13 | */ 14 | public static Map getSortedMap(Map resultMap) { 15 | System.out.println("result Map without sort >> " + resultMap); 16 | Map sortedResultMap = new TreeMap(new DoubleMapComparatorGeneric(resultMap)); 17 | sortedResultMap.putAll(resultMap); 18 | System.out.println("resultMap >> " + sortedResultMap); 19 | return sortedResultMap; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/CosineSimilarityCalculator.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | /** 4 | * @author spujari 5 | */ 6 | public class CosineSimilarityCalculator{ 7 | 8 | /** 9 | * Get the cosine similarity for the vectors. 10 | * @param v1 11 | * @param v2 12 | * @return 13 | */ 14 | public static double getCosineSimilarity(Vector v1, Vector v2) { 15 | double similarity = 0d; 16 | double absValueVector1 = VectorUtil.getVectorAbsValue(v1); 17 | double absValueVector2 = VectorUtil.getVectorAbsValue(v2); 18 | if (absValueVector1 != 0 && absValueVector2 != 0) { 19 | similarity = VectorUtil.getVectorDotProduct(v1, v2) / (absValueVector1 * absValueVector2); 20 | } 21 | return similarity; 22 | } 23 | 24 | 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/HashtagEntropyCalculatorTest.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | import org.junit.After; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | public class HashtagEntropyCalculatorTest { 11 | 12 | HashMap>> userTagTimestamps; 13 | 14 | @Before 15 | public void init(){ 16 | userTagTimestamps = initUserTagTimestamps(); 17 | } 18 | 19 | @Test 20 | public void computeAllHashtagEntropyMap(){ 21 | HashtagEntropyCalculator.computeAllHashtagEntropyMap(userTagTimestamps, 2); 22 | } 23 | 24 | @After 25 | public void destroy(){ 26 | 27 | } 28 | 29 | HashMap>> initUserTagTimestamps() { 30 | 31 | HashMap>> userTagTimestamps = new HashMap>>(); 32 | 33 | // adding hashmap for user 1 34 | userTagTimestamps.put("user1", new HashMap>()); 35 | userTagTimestamps.get("user1").put(1, new ArrayList()); 36 | userTagTimestamps.get("user1").get(1).add(1l); 37 | userTagTimestamps.get("user1").get(1).add(4l); 38 | userTagTimestamps.get("user1").get(1).add(6l); 39 | 40 | // adding user map for user 2 41 | userTagTimestamps.get("user1").put(2, new ArrayList()); 42 | userTagTimestamps.get("user1").get(2).add(2l); 43 | userTagTimestamps.get("user1").get(2).add(5l); 44 | 45 | userTagTimestamps.put("user2", new HashMap>()); 46 | userTagTimestamps.get("user2").put(1, new ArrayList()); 47 | userTagTimestamps.get("user2").get(1).add(3l); 48 | userTagTimestamps.get("user2").get(1).add(6l); 49 | 50 | userTagTimestamps.get("user2").put(2, new ArrayList()); 51 | userTagTimestamps.get("user2").get(2).add(3l); 52 | userTagTimestamps.get("user2").get(2).add(7l); 53 | 54 | userTagTimestamps.get("user2").put(3, new ArrayList()); 55 | userTagTimestamps.get("user2").get(3).add(8l); 56 | 57 | // adding hashmap for user 3 58 | userTagTimestamps.put("user3", new HashMap>()); 59 | userTagTimestamps.get("user3").put(1, new ArrayList()); 60 | userTagTimestamps.get("user3").get(1).add(5l); 61 | 62 | userTagTimestamps.get("user3").put(2, new ArrayList()); 63 | userTagTimestamps.get("user3").get(2).add(3l); 64 | userTagTimestamps.get("user3").get(2).add(5l); 65 | 66 | userTagTimestamps.put("user4", new HashMap>()); 67 | userTagTimestamps.get("user4").put(4, new ArrayList()); 68 | userTagTimestamps.get("user4").get(4).add(1l); 69 | 70 | userTagTimestamps.get("user4").put(5, new ArrayList()); 71 | userTagTimestamps.get("user4").get(5).add(1l); 72 | userTagTimestamps.get("user4").get(5).add(2l); 73 | 74 | return userTagTimestamps; 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/HashtagUtil.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | import java.util.TreeMap; 7 | 8 | import common.DoubleMapComparator; 9 | import common.DoubleMapComparatorGeneric; 10 | 11 | public class HashtagUtil { 12 | 13 | /** 14 | * Convert User-tag-timestamps Map to tag-user-count Map. 15 | * 16 | * @param userTagTimestamps 17 | * @return 18 | */ 19 | public static HashMap> getTagUserCount( 20 | HashMap>> userTagTimestamps) { 21 | HashMap> tagUserCount = new HashMap>(); 22 | for (String user : userTagTimestamps.keySet()) { 23 | if (userTagTimestamps.get(user) != null) { 24 | for (Integer tag : userTagTimestamps.get(user).keySet()) { 25 | if (userTagTimestamps.get(user).get(tag) != null) { 26 | if (!tagUserCount.containsKey(tag)) { 27 | tagUserCount.put(tag, new HashMap()); 28 | } 29 | tagUserCount.get(tag).put(user, userTagTimestamps.get(user).get(tag).size()); 30 | } 31 | } 32 | } 33 | } 34 | return tagUserCount; 35 | } 36 | 37 | /** 38 | * Get All user TFIDF score. 39 | * @return {@link HashMap} of the user and 40 | */ 41 | public static HashMap createAllUserTFIDFVector( 42 | HashMap>> userTagTimestamps, 43 | HashMap> tagUserCount) { 44 | 45 | System.out.print("All user TFIDF calculation starts >> "); 46 | HashMap allUserTFIDFVectorMap = new HashMap(); 47 | for (String userName : userTagTimestamps.keySet()) { 48 | System.out.println("TFIDF calculation going for user >> " + userName); 49 | Vector hashtagTFIDFVector = UserTFIDFVectorCalculator.createUserTFIDFVector(userTagTimestamps, tagUserCount, 50 | userName); 51 | System.out.println("TFDIDF vector for the user as >> " + hashtagTFIDFVector.getVector()); 52 | allUserTFIDFVectorMap.put(userName, hashtagTFIDFVector); 53 | } 54 | System.out.println("All user TFIDF calculation ends >> "); 55 | return allUserTFIDFVectorMap; 56 | } 57 | 58 | public static Map getSortedMap( Map resultMap) { 59 | Map sortedResultMap = new TreeMap(new DoubleMapComparator(resultMap)); 60 | sortedResultMap.putAll(resultMap); 61 | return sortedResultMap; 62 | } 63 | 64 | public static Map getSortedMapString( Map resultMap) { 65 | Map sortedResultMap = new TreeMap(new DoubleMapComparatorGeneric(resultMap)); 66 | sortedResultMap.putAll(resultMap); 67 | return sortedResultMap; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/PersonalisedSimilarityCalculator.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.HashSet; 6 | import java.util.Set; 7 | 8 | public class PersonalisedSimilarityCalculator { 9 | 10 | /** 11 | * Personalised Similarity: Sum of similarity score of a users who have used 12 | * the hashtag to the target user, more the similarity score between the 13 | * users who have used the same hashtag, higher is the chance of use of 14 | * hashtag. 15 | * 16 | * @param hashtag 17 | * the hashtag for which we want to calculate the similarity 18 | * score 19 | * @param user 20 | * target user who is creating the target tweet. 21 | * @return 22 | */ 23 | public static double getPersonalisedSimilarity(String user, ArrayList friendsArrayList, HashSet friends, HashMap>> userTagTimes) { 25 | //ArrayList friendsArrayList = network.get(user); 26 | HashSet intersectedUsers = new HashSet(friendsArrayList); 27 | //HashSet friends = new HashSet(tagUserMap.get(targetHashtag)); 28 | intersectedUsers.retainAll(friends); 29 | double simScore = findSumOfSimilarityScore(user, new ArrayList(intersectedUsers), userTagTimes); 30 | return simScore; 31 | } 32 | 33 | /** 34 | * Sum of similarity score in case of personalised recommendation. 35 | * 36 | * @param targetUser 37 | * @param candidateUsers 38 | * @return 39 | */ 40 | private static double findSumOfSimilarityScore(String targetUser, ArrayList candidateUsers, 41 | HashMap>> userTagTimes) { 42 | double similarityScoreValue = 0d; 43 | Set targetUserHashtag; 44 | Set candidateUserHashtag; 45 | Set intersection; 46 | for (String candidateUser : candidateUsers) { 47 | targetUserHashtag = userTagTimes.get(targetUser).keySet(); 48 | if (userTagTimes.containsKey(candidateUser)) { 49 | candidateUserHashtag = new HashSet(userTagTimes.get(candidateUser).keySet()); 50 | intersection = new HashSet(targetUserHashtag); 51 | intersection.retainAll(candidateUserHashtag); 52 | 53 | if (targetUserHashtag.size() > 0 && candidateUserHashtag.size() > 0) { 54 | similarityScoreValue += computeSimilarityScore(intersection.size(), targetUserHashtag.size(), 55 | candidateUserHashtag.size()); 56 | } 57 | } 58 | } 59 | return similarityScoreValue; 60 | } 61 | 62 | /** 63 | * Compute similarity score. 64 | * 65 | * @param intersectionSize 66 | * @param set1Size 67 | * @param set2Size 68 | * @return 69 | */ 70 | private static double computeSimilarityScore(int intersectionSize, int set1Size, int set2Size) { 71 | return (double) intersectionSize / (double) (set1Size * set2Size); 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/PersonalisedSimilarityCalculatorTest.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | import org.junit.After; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | public class PersonalisedSimilarityCalculatorTest { 11 | 12 | private HashMap>> userTagTimestamps = new HashMap>>(); 13 | private HashMap> tagUserCount = new HashMap>(); 14 | 15 | @Before 16 | public void init() { 17 | userTagTimestamps = initUserTagTimestamps(); 18 | tagUserCount = HashtagUtil.getTagUserCount(userTagTimestamps); 19 | } 20 | 21 | @Test 22 | public void getPersonalisedSimilarityTest() { 23 | HashMap userTFIDFVector = HashtagUtil.createAllUserTFIDFVector(userTagTimestamps, tagUserCount); 24 | System.out.print(" vector 1 >> " + userTFIDFVector.get("user1").getVector()); 25 | System.out.print(" vector 2 >> " + userTFIDFVector.get("user2").getVector()); 26 | System.out.print(" vector 3 >> " + userTFIDFVector.get("user3").getVector()); 27 | double cosineSimilarity = CosineSimilarityCalculator.getCosineSimilarity(userTFIDFVector.get("user1"), 28 | userTFIDFVector.get("user2")); 29 | System.out.println(" cosine Similarity Score >> " + cosineSimilarity); 30 | } 31 | 32 | @After 33 | public void destroy() { 34 | 35 | } 36 | 37 | HashMap>> initUserTagTimestamps() { 38 | 39 | HashMap>> userTagTimestamps = new HashMap>>(); 40 | 41 | // adding hashmap for user 1 42 | userTagTimestamps.put("user1", new HashMap>()); 43 | userTagTimestamps.get("user1").put(1, new ArrayList()); 44 | userTagTimestamps.get("user1").get(1).add(1l); 45 | userTagTimestamps.get("user1").get(1).add(2l); 46 | userTagTimestamps.get("user1").get(1).add(3l); 47 | 48 | // adding user map for user 2 49 | userTagTimestamps.get("user1").put(2, new ArrayList()); 50 | userTagTimestamps.get("user1").get(2).add(1l); 51 | userTagTimestamps.get("user1").get(2).add(2l); 52 | 53 | userTagTimestamps.put("user2", new HashMap>()); 54 | userTagTimestamps.get("user2").put(1, new ArrayList()); 55 | userTagTimestamps.get("user2").get(1).add(2l); 56 | userTagTimestamps.get("user2").get(1).add(3l); 57 | 58 | userTagTimestamps.get("user2").put(2, new ArrayList()); 59 | userTagTimestamps.get("user2").get(2).add(1l); 60 | userTagTimestamps.get("user2").get(2).add(2l); 61 | 62 | userTagTimestamps.get("user2").put(3, new ArrayList()); 63 | userTagTimestamps.get("user2").get(3).add(1l); 64 | 65 | // adding hashmap for user 3 66 | userTagTimestamps.put("user3", new HashMap>()); 67 | userTagTimestamps.get("user3").put(1, new ArrayList()); 68 | userTagTimestamps.get("user3").get(1).add(1l); 69 | 70 | userTagTimestamps.get("user3").put(2, new ArrayList()); 71 | userTagTimestamps.get("user3").get(2).add(1l); 72 | userTagTimestamps.get("user3").get(2).add(2l); 73 | 74 | userTagTimestamps.put("user4", new HashMap>()); 75 | userTagTimestamps.get("user4").put(4, new ArrayList()); 76 | userTagTimestamps.get("user4").get(4).add(1l); 77 | 78 | userTagTimestamps.get("user4").put(5, new ArrayList()); 79 | userTagTimestamps.get("user4").get(5).add(1l); 80 | userTagTimestamps.get("user4").get(5).add(2l); 81 | 82 | return userTagTimestamps; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/PersonalisedTFIDFCalculator.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.apache.solr.common.util.Hash; 8 | 9 | import processing.hashtag.TagRecommendationUtil; 10 | 11 | /** 12 | * This class implements a version of collaborative filtering algorithm that 13 | * assign score to the hashtags and rank hashtags based on this score. The main 14 | * steps of the algorithm can be illustrated as follows. 15 | * 16 | * getHashTagScoreResultMap() - gives the score of all the hashtags in our 17 | * dataset. 18 | * 19 | * >> computeHashtagVectorMap() compute the TFIDF vector of hashtags for each 20 | * user >> A TFIDF vector for a user means a vector in which hashtags are 21 | * elements and a tfidf score associated with each hashtag. -- Get a map of 22 | * TFIDF vector for each user. -- 23 | * 24 | * @author spujari 25 | * 26 | */ 27 | public class PersonalisedTFIDFCalculator { 28 | 29 | private HashMap>> userTagTimestamps; 30 | private HashMap> tagUserCount; 31 | private HashMap allUserTFIDFVector; 32 | 33 | /** 34 | * Constructor. 35 | * 36 | * @param userTagTimestamps 37 | * @param tagUserCount 38 | */ 39 | public PersonalisedTFIDFCalculator(HashMap>> userTagTimestamps, 40 | HashMap> tagUserCount) { 41 | this.userTagTimestamps = userTagTimestamps; 42 | this.tagUserCount = tagUserCount; 43 | this.allUserTFIDFVector = HashtagUtil.createAllUserTFIDFVector(userTagTimestamps, tagUserCount); 44 | } 45 | 46 | public HashMap getAllUserTFIDFVector() { 47 | return allUserTFIDFVector; 48 | } 49 | 50 | /** 51 | * Get the result map of hashtag scores. 52 | * 53 | * @param user 54 | * @param friends 55 | * @return 56 | */ 57 | public Map getHashTagScoreResultMap(String user, ArrayList friends, 58 | HashMap allUserTFIDFVector) { 59 | System.out.println("Computation going for user >> " + user + " number of friends >> " + friends); 60 | Map resultMap = new HashMap(); 61 | Vector targetUserVector = this.allUserTFIDFVector.get(user); 62 | Map userSimScoreMap = new HashMap(); 63 | for (String friend : friends) { 64 | if (this.allUserTFIDFVector.containsKey(friend)) { 65 | Vector friendUserVector = this.allUserTFIDFVector.get(friend); 66 | double simScore = CosineSimilarityCalculator.getCosineSimilarity(targetUserVector, friendUserVector); 67 | if(simScore > 0){ 68 | userSimScoreMap.put(friend, simScore); 69 | } 70 | } 71 | } 72 | //userSimScoreMap = TagRecommendationUtil.getSortedMap(userSimScoreMap); 73 | //userSimScoreMap = choseKFromSortedMap(userSimScoreMap, 100); 74 | resultMap = computeHashtagScoreMapFromUserScoreMap(userSimScoreMap); 75 | //resultMap = TagRecommendationUtil.getSortedMap(resultMap); 76 | //System.out.println("print sorted result map >> " + resultMap); 77 | return resultMap; 78 | } 79 | 80 | /** 81 | * Compute Hashtag Score. 82 | * 83 | * @param userSimScoreMap 84 | * @return 85 | */ 86 | private HashMap computeHashtagScoreMapFromUserScoreMap(Map userSimScoreMap) { 87 | //System.out.println("user sim score map >> " + userSimScoreMap); 88 | userSimScoreMap = new HashMap(userSimScoreMap); 89 | HashMap hashTagScore = new HashMap(); 90 | for (String user : userSimScoreMap.keySet()) { 91 | if (userTagTimestamps.containsKey(user)) { 92 | for (Integer tag : userTagTimestamps.get(user).keySet()) { 93 | if (hashTagScore.containsKey(tag)) { 94 | //System.out.println(" tag >> " + tag); 95 | //System.out.println(" user >> " + user); 96 | //System.out.println(" Hashtag score >> " + hashTagScore.get(tag)); 97 | //System.out.println(" userSimScoreMap score >> " + userSimScoreMap.get(user)); 98 | if (hashTagScore.get(tag) < userSimScoreMap.get(user) && userSimScoreMap.get(user) > 0) { 99 | hashTagScore.put(tag, userSimScoreMap.get(user)); 100 | } 101 | } else { 102 | if(userSimScoreMap.get(user) > 0){ 103 | hashTagScore.put(tag, userSimScoreMap.get(user)); 104 | } 105 | } 106 | } 107 | } 108 | } 109 | return hashTagScore; 110 | } 111 | 112 | /** 113 | * Chose highest K integers from the map. 114 | * 115 | * @param sortedMap 116 | * @param k 117 | * @return 118 | */ 119 | private Map choseKFromSortedMap(Map sortedMap, int k) { 120 | HashMap chosenKSortedMap = new HashMap(); 121 | int count = 0; 122 | HashMap map = new HashMap(sortedMap); 123 | for (String key : map.keySet()) { 124 | double sortedValue = map.get(key); 125 | chosenKSortedMap.put(key, sortedValue); 126 | if (count >= k) { 127 | break; 128 | } else { 129 | count += 1; 130 | } 131 | } 132 | return chosenKSortedMap; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/UserTFIDFVectorCalculator.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | /** 7 | * @author spujari 8 | * 9 | */ 10 | public class UserTFIDFVectorCalculator{ 11 | /** 12 | * Create a TFIDF vector for a user. 13 | * @param user 14 | * @return 15 | */ 16 | public static Vector createUserTFIDFVector(HashMap>> userTagTimestamps, 17 | HashMap> tagUserCount, String userName) { 18 | Vector vector = new Vector(); 19 | for (Integer tag : userTagTimestamps.get(userName).keySet()) { 20 | int totalNumberOfUsers = userTagTimestamps.size(); 21 | if (userTagTimestamps.get(userName).containsKey(tag)) { 22 | int tagUsageCountOfUser = userTagTimestamps.get(userName).get(tag).size(); 23 | System.out.println(" number of times tag used by user >> " + tagUsageCountOfUser + " >> " + " user using the tags >> " + tagUserCount.get(tag).keySet().size() + " total Number of Users >> " + totalNumberOfUsers); 24 | vector.getVector().put(tag, 25 | getHashTagTFIDFValue(tagUsageCountOfUser, tagUserCount.get(tag).keySet().size(), totalNumberOfUsers)); 26 | } 27 | } 28 | return vector; 29 | } 30 | 31 | /** 32 | * Get TFIDF value for a Hashtag. 33 | * @param userName 34 | * @param hashtag 35 | * @return 36 | */ 37 | private static double getHashTagTFIDFValue(int tagUsageCountOfUser, int numberOfUsersOfTag, int numberOfUsersDataset) { 38 | double tfIdf = 0d; 39 | double idfScore = getHashtagIDF(numberOfUsersOfTag, numberOfUsersDataset); 40 | System.out.println(" idfScore >> " + idfScore); 41 | tfIdf = (double)tagUsageCountOfUser * idfScore; 42 | System.out.println(" tfidf score >> " + tfIdf); 43 | return tfIdf; 44 | } 45 | 46 | 47 | /** 48 | * Get the IDF value of the hashtag. 49 | * @param numberOfUsersOfTag 50 | * @param numberOfUsers 51 | * @return 52 | */ 53 | private static double getHashtagIDF(double numberOfUsersOfTag, double numberOfUsers) { 54 | double idfUser = 0d; 55 | if(numberOfUsersOfTag!=0){ 56 | idfUser = numberOfUsers / numberOfUsersOfTag; 57 | } 58 | if(idfUser != 0){ 59 | double logidfUser = Math.log(idfUser); 60 | return logidfUser; 61 | }else{ 62 | return idfUser; 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/Vector.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import java.util.HashMap; 4 | 5 | public class Vector { 6 | private HashMap vector; 7 | 8 | public Vector() { 9 | vector = new HashMap(); 10 | } 11 | 12 | public HashMap getVector() { 13 | return vector; 14 | } 15 | 16 | public void setVector(HashMap vector) { 17 | this.vector = vector; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/VectorUtil.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | public class VectorUtil { 4 | 5 | /** 6 | * Vector Abs value. 7 | * @param vector 8 | * @return 9 | */ 10 | public static double getVectorAbsValue(Vector vector) { 11 | double vectorAbsValue = 0; 12 | for (Integer hashtag : vector.getVector().keySet()) { 13 | vectorAbsValue += (vector.getVector().get(hashtag) * vector.getVector().get(hashtag)); 14 | } 15 | if(vectorAbsValue != 0){ 16 | vectorAbsValue = Math.sqrt(vectorAbsValue); 17 | } 18 | return vectorAbsValue; 19 | } 20 | 21 | /** 22 | * Vector dot product. 23 | * @param v1 24 | * @param v2 25 | * @return 26 | */ 27 | public static double getVectorDotProduct(Vector v1, Vector v2) { 28 | double dotProduct = 0d; 29 | for (Integer key : v1.getVector().keySet()) { 30 | if (v2.getVector().containsKey(key)) { 31 | dotProduct += v1.getVector().get(key) * v2.getVector().get(key); 32 | } 33 | } 34 | return dotProduct; 35 | } 36 | 37 | public static void main(String[] args){ 38 | Vector vector1 = new Vector(); 39 | vector1.getVector().put(1, 3.0); 40 | vector1.getVector().put(2, 4.0); 41 | //System.out.println(" Vector abs values >> " + VectorUtil.getVectorAbsValue(vector1)); 42 | Vector vector2 = new Vector(); 43 | vector2.getVector().put(1, 5.0); 44 | vector2.getVector().put(2, 4.0); 45 | //System.out.println(" Cosine sim >> " + CosineSimilarityCalculator.getCosineSimilarity(vector1, vector2)); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/processing/hashtag/baseline/VectorUtilTest.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.baseline; 2 | 3 | import org.junit.Test; 4 | import junit.framework.Assert; 5 | 6 | 7 | /** 8 | * 9 | * @author spujari 10 | * 11 | */ 12 | public class VectorUtilTest { 13 | 14 | @Test 15 | public void calculateVectorSimilarityTest(){ 16 | Assert.assertEquals(true, true); 17 | } 18 | 19 | @Test 20 | public void calculateSimScoreTest(){ 21 | Vector v1 = new Vector(); 22 | Vector v2 = new Vector(); 23 | double dotproduct = VectorUtil.getVectorDotProduct(v1, v2); 24 | System.out.println(" hello world >> " + dotproduct); 25 | 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/processing/hashtag/social/RelationItem.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.social; 2 | 3 | import java.util.Date; 4 | 5 | 6 | /** 7 | * @author spujari 8 | * 9 | * 10 | */ 11 | public class RelationItem { 12 | private int id; 13 | private String targetUser; 14 | private String initUser; 15 | private long tweetId; 16 | private Date createdAt; 17 | 18 | public int getId() { 19 | return id; 20 | } 21 | public void setId(int id) { 22 | this.id = id; 23 | } 24 | public String getTargetUser() { 25 | return targetUser; 26 | } 27 | public void setTargetUser(String target_user) { 28 | this.targetUser = target_user; 29 | } 30 | public String getInitUser() { 31 | return initUser; 32 | } 33 | public void setInitUser(String init_user) { 34 | this.initUser = init_user; 35 | } 36 | public long getTweetId() { 37 | return tweetId; 38 | } 39 | public void setTweetId(long tweetId) { 40 | this.tweetId = tweetId; 41 | } 42 | public Date getCreatedAt() { 43 | return createdAt; 44 | } 45 | public void setCreatedAt(Date createdAt) { 46 | this.createdAt = createdAt; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/processing/hashtag/social/SocialBLLCalculator.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.social; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.TreeMap; 8 | 9 | import common.DoubleMapComparator; 10 | 11 | public class SocialBLLCalculator { 12 | 13 | private HashMap>> userTagTimes; 14 | private HashMap> network; 15 | private List users; 16 | 17 | public SocialBLLCalculator(HashMap>> userTagTimes, 18 | HashMap> network, List users) { 19 | this.userTagTimes = userTagTimes; 20 | this.network = network; 21 | this.users = users; 22 | } 23 | 24 | /** 25 | * Social BLL 26 | * 27 | * @param userID 28 | * @param timesString 29 | * {@link Long} 30 | * @param exponentSocial 31 | * {@link Double} 32 | * @return {@link Map} hashtag ids to the tag weight. 33 | **/ 34 | public Map getRankedTagListSocial(int userID, Long timesString, double exponentSocial) { 35 | String user = this.users.get(userID); 36 | List friendList = network.get(user); 37 | HashMap tagRank = new HashMap(); 38 | 39 | if (friendList == null) { 40 | return tagRank; 41 | } 42 | 43 | for (String friend : friendList) { 44 | HashMap> tagTimestampMap = userTagTimes.get(friend); 45 | if (tagTimestampMap != null) { 46 | for (Integer tag : tagTimestampMap.keySet()) { 47 | ArrayList timestampList = tagTimestampMap.get(tag); 48 | // is there a timestamp less than the given timestamp 49 | for (Long timestampLong : timestampList) { 50 | 51 | if (timesString > timestampLong) { 52 | long duration = timesString - timestampLong; 53 | 54 | if (tagRank.containsKey(tag)) { 55 | tagRank.put(tag, tagRank.get(tag) + Math.pow(duration, (-1) * (exponentSocial))); 56 | } else { 57 | tagRank.put(tag, Math.pow(duration, (-1) * (exponentSocial))); 58 | } 59 | } 60 | } 61 | 62 | } 63 | } 64 | } 65 | 66 | double denom = 0.0; 67 | if (tagRank != null) { 68 | for (Map.Entry entry : tagRank.entrySet()) { 69 | if (entry != null) { 70 | double actVal = Math.log(entry.getValue()); 71 | denom += Math.exp(actVal); 72 | entry.setValue(actVal); 73 | } 74 | } 75 | for (Map.Entry entry : tagRank.entrySet()) { 76 | if (entry != null) { 77 | double actVal = Math.exp(entry.getValue()); 78 | entry.setValue(actVal / denom); 79 | } 80 | } 81 | } 82 | Map sortedResultMap = new TreeMap(new DoubleMapComparator(tagRank)); 83 | sortedResultMap.putAll(tagRank); 84 | return sortedResultMap; 85 | 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/processing/hashtag/social/SocialFrequencyCalculator.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.social; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.TreeMap; 8 | 9 | import common.DoubleMapComparator; 10 | 11 | public class SocialFrequencyCalculator { 12 | 13 | private HashMap>> userTagTimes; 14 | private HashMap> network; 15 | private List users; 16 | 17 | public SocialFrequencyCalculator(HashMap>> userTagTimes, 18 | HashMap> network, List users) { 19 | this.userTagTimes = userTagTimes; 20 | this.network = network; 21 | this.users = users; 22 | } 23 | 24 | /** 25 | * The tag weight is calculated based on how frequently a tag have been used by a user. 26 | * 27 | * @param userID 28 | * {@link Integer} 29 | * @param timesString 30 | * {@link Long} 31 | * @param sort 32 | * {@link Boolean} 33 | */ 34 | public Map getRankedTagListSocialFrequency(int userID, Long timesString, boolean sort) { 35 | String user = this.users.get(userID); 36 | List friendList = network.get(user); 37 | HashMap tagRank = new HashMap(); 38 | if (friendList == null) { 39 | return tagRank; 40 | } 41 | for (String friend : friendList) { 42 | HashMap> tagTimestampMap = userTagTimes.get(friend); 43 | if (tagTimestampMap != null) { 44 | for (Integer tag : tagTimestampMap.keySet()) { 45 | ArrayList timestampList = tagTimestampMap.get(tag); 46 | // is there a timestamp less than the given timestamp 47 | for (Long timestampLong : timestampList) { 48 | 49 | if (timesString > timestampLong) { 50 | long duration = timesString - timestampLong; 51 | 52 | if (tagRank.containsKey(tag)) { 53 | tagRank.put(tag, tagRank.get(tag) + Math.pow(duration, 1.0)); 54 | } else { 55 | tagRank.put(tag, 1.0); 56 | } 57 | } 58 | } 59 | 60 | } 61 | } 62 | } 63 | 64 | double denom = 0.0; 65 | if (tagRank != null) { 66 | for (Map.Entry entry : tagRank.entrySet()) { 67 | if (entry != null) { 68 | double actVal = Math.log(entry.getValue()); 69 | denom += Math.exp(actVal); 70 | entry.setValue(actVal); 71 | } 72 | } 73 | for (Map.Entry entry : tagRank.entrySet()) { 74 | if (entry != null) { 75 | double actVal = Math.exp(entry.getValue()); 76 | entry.setValue(actVal / denom); 77 | } 78 | } 79 | } 80 | 81 | if (sort) { 82 | Map sortedResultMap = new TreeMap(new DoubleMapComparator(tagRank)); 83 | sortedResultMap.putAll(tagRank); 84 | return sortedResultMap; 85 | } else { 86 | return tagRank; 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/processing/hashtag/social/SocialInitEngine.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.social; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | import java.util.ArrayList; 10 | import java.util.HashMap; 11 | import java.util.List; 12 | 13 | import common.Bookmark; 14 | 15 | /** 16 | * @author spujari 17 | * This class init the main datastructures used in program. 18 | */ 19 | public class SocialInitEngine { 20 | 21 | /** 22 | * Get the underlying network over which the information flow. 23 | * @param filepath 24 | * @param nameIdMap 25 | * @return HashMap name and friends as 26 | **/ 27 | public static HashMap> getNetwork(String filepath, HashMap nameIdMap) { 28 | HashMap> network = new HashMap>(); 29 | try { 30 | File file = new File(filepath); 31 | InputStreamReader reader = new InputStreamReader(new FileInputStream(file), "UTF8"); 32 | BufferedReader br = new BufferedReader(reader); 33 | String line = ""; 34 | while ((line = br.readLine()) != null) { 35 | String[] tokens = line.split("\t"); 36 | String user1 = tokens[0]; 37 | String user2 = tokens[1]; 38 | if (!network.containsKey(user1)) { 39 | network.put(user1, new ArrayList()); 40 | network.get(user1).add(user2); 41 | } else { 42 | network.get(user1).add(user2); 43 | } 44 | } 45 | } catch (FileNotFoundException e) { 46 | System.out.println("Cannot find the network file: " + filepath + "\n"); 47 | } catch (IOException e) { 48 | System.out.println("Error in reading from the network file: " + filepath + "\n"); 49 | } 50 | return network; 51 | } 52 | 53 | /** 54 | * The user tags and their timeline information. 55 | * @param bookmarkList 56 | * @return 57 | */ 58 | public static HashMap>> getBookmarks(List bookmarkList, List users) { 59 | HashMap>> userTagTimes = new HashMap>>(); 60 | for (Bookmark bookmark : bookmarkList) { 61 | List taglist = bookmark.getTags(); 62 | Integer userId = bookmark.getUserID(); 63 | // get the userName for the id. 64 | String userName = users.get(userId); 65 | // userName = this.twitterScreenNameIdMap.get(userName); 66 | String timestamp = bookmark.getTimestamp(); 67 | Long timestampLong = bookmark.getTimestampAsLong(); 68 | if (!userTagTimes.containsKey(userName)) { 69 | userTagTimes.put(userName, new HashMap>()); 70 | } 71 | for (Integer tag : taglist) { 72 | if (!userTagTimes.get(userName).containsKey(tag)) { 73 | userTagTimes.get(userName).put(tag, new ArrayList()); 74 | } 75 | userTagTimes.get(userName).get(tag).add(timestampLong); 76 | } 77 | } 78 | return userTagTimes; 79 | } 80 | 81 | /** 82 | * Takes a list of username string with user id as index of the list. 83 | * 84 | * @param idNameMap 85 | * @return {@link HashMap} the map from name of user to his id in bookmark 86 | * system. 87 | */ 88 | public static HashMap getNameIdMap(List idNameMap) { 89 | 90 | HashMap nameIdMap = new HashMap(); 91 | for (int i = 0; i < idNameMap.size(); i++) { 92 | nameIdMap.put(idNameMap.get(i), i); 93 | } 94 | return nameIdMap; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/processing/hashtag/solr/CFSolrHashtagCalculator.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.solr; 2 | 3 | import java.io.File; 4 | import java.util.ArrayList; 5 | import java.util.LinkedHashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import com.google.common.primitives.Ints; 10 | 11 | import common.Bookmark; 12 | import common.MapUtil; 13 | import file.BookmarkReader; 14 | import file.PredictionFileWriter; 15 | import file.ResultSerializer; 16 | import processing.CFTagRecommender; 17 | 18 | public class CFSolrHashtagCalculator { 19 | 20 | public static void predictSample(String dir, String filename, int trainSize, double betaCB, String solrUrl, String solrCore) { 21 | BookmarkReader reader = new BookmarkReader(0, false); 22 | reader.readFile(filename); 23 | Map> contentBasedValues = null; 24 | if (solrUrl != null && solrCore != null) { 25 | if (new File("./data/results/" + dir + "/" + solrCore + "_cbpredictions.ser").exists()) { 26 | System.out.println("Found cb file ..."); 27 | contentBasedValues = SolrHashtagCalculator.deSerializeHashtagPrediction("./data/results/" + dir + "/" + solrCore + "_cbpredictions.ser"); 28 | } else { 29 | System.out.println("Did not find cb file ..."); 30 | contentBasedValues = SolrHashtagCalculator.getNormalizedHashtagPredictions(dir, solrCore, solrUrl, reader, null); 31 | } 32 | System.out.println("Number of content-based recommendations: " + contentBasedValues.size()); 33 | } 34 | 35 | reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size())); 36 | List testLines = reader.getTestLines(); 37 | CFTagRecommender calculator = new CFTagRecommender(reader, trainSize, true, false, 5); 38 | List> resultValues = new ArrayList>(); 39 | Map> hashtagMaps = new LinkedHashMap>(); 40 | 41 | for (Bookmark b : testLines) { 42 | // get cf Tags 43 | if (contentBasedValues.containsKey(b.getUserID())) { 44 | Map cfTags = calculator.getRankedTagList(b.getUserID(), b.getResourceID(), false); 45 | MapUtil.normalizeMap(cfTags); 46 | if (cfTags != null && cfTags.entrySet() != null) { 47 | for (Map.Entry cfEntry : cfTags.entrySet()) { 48 | if (cfEntry != null && cfEntry.getKey() != null) { 49 | cfEntry.setValue(betaCB * cfEntry.getValue()); 50 | } 51 | } 52 | } 53 | 54 | Map contentMap = contentBasedValues.get(b.getUserID()); 55 | if (contentMap != null && contentMap.entrySet() != null) { 56 | for (Map.Entry contentEntry : contentMap.entrySet()) { 57 | if (contentEntry != null && contentEntry.getKey() != null) { 58 | Double cfVal = cfTags.get(contentEntry.getKey()); 59 | double contentVal = (1.0 - betaCB) * contentEntry.getValue(); 60 | cfTags.put(contentEntry.getKey(), cfVal == null ? contentVal : cfVal.doubleValue() + contentVal); 61 | } 62 | } 63 | } 64 | 65 | Map sortedMap = MapUtil.sortByValue(cfTags); 66 | resultValues.add(sortedMap); 67 | hashtagMaps.put(b.getUserID(), sortedMap); 68 | } else { 69 | // ignore all entries where no content-based recommendations where found 70 | resultValues.add(null); 71 | } 72 | } 73 | 74 | List predictionValues = new ArrayList(); 75 | if (resultValues != null) { 76 | for (int i = 0; i < resultValues.size(); i++) { 77 | Map resultMap = resultValues.get(i); 78 | if (resultMap != null && resultMap.keySet() != null) { 79 | predictionValues.add(Ints.toArray(resultMap.keySet())); 80 | } else { 81 | predictionValues.add(null); 82 | } 83 | } 84 | } 85 | 86 | ResultSerializer.serializePredictions(hashtagMaps, "./data/results/" + dir + "/" + solrCore + "_cbcfpredictions.ser"); 87 | PredictionFileWriter writer = new PredictionFileWriter(reader, predictionValues); 88 | writer.writeFile(filename + "_cf_cb_" + betaCB); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/processing/hashtag/solr/Tweet.java: -------------------------------------------------------------------------------- 1 | package processing.hashtag.solr; 2 | 3 | import java.io.Serializable; 4 | import java.util.HashSet; 5 | import java.util.LinkedHashSet; 6 | import java.util.Set; 7 | 8 | public class Tweet implements Serializable { 9 | 10 | private String id; 11 | private String userid; 12 | private String text; 13 | private String timestamp; 14 | private Set hashtags = new LinkedHashSet(); 15 | 16 | public Tweet(String id, String userid, String text, String timestamp, Set hashtags) { 17 | this.id = id; 18 | this.userid = userid; 19 | this.text = text; 20 | this.timestamp = timestamp; 21 | for (String ht : hashtags) { 22 | this.hashtags.add(ht.toLowerCase()); 23 | } 24 | } 25 | 26 | public String getId() { 27 | return id; 28 | } 29 | 30 | public void setId(String id) { 31 | this.id = id; 32 | } 33 | 34 | public String getUserid() { 35 | return userid; 36 | } 37 | 38 | public void setUserid(String userid) { 39 | this.userid = userid; 40 | } 41 | 42 | public String getText() { 43 | return text; 44 | } 45 | 46 | public void setText(String text) { 47 | this.text = text; 48 | } 49 | 50 | public String getTimestamp() { 51 | return timestamp; 52 | } 53 | 54 | public void setTimestamp(String timestamp) { 55 | this.timestamp = timestamp; 56 | } 57 | 58 | public Set getHashtags() { 59 | return hashtags; 60 | } 61 | 62 | public void setHashtags(Set hashtags) { 63 | this.hashtags = hashtags; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/userrecommender/SpearAlgorithmResult.java: -------------------------------------------------------------------------------- 1 | package userrecommender; 2 | 3 | import java.util.Map; 4 | /** 5 | * SpearAlgorithmResult stores two score vectors with user and recourse end scores 6 | * 7 | * @author ilire.mavriqi 8 | * 9 | */ 10 | 11 | public class SpearAlgorithmResult { 12 | 13 | 14 | private Map expertiseResult = null; 15 | private Map qualityResult = null; 16 | 17 | public SpearAlgorithmResult() { 18 | 19 | } 20 | 21 | public SpearAlgorithmResult(Map expertiseResult, 22 | Map qualityResult) { 23 | this.expertiseResult = expertiseResult; 24 | this.qualityResult = qualityResult; 25 | } 26 | public Map getExpertiseResult() { 27 | return expertiseResult; 28 | } 29 | 30 | public void setExpertiseResult(Map expertiseResult) { 31 | this.expertiseResult = expertiseResult; 32 | } 33 | 34 | public Map getQualityResult() { 35 | return qualityResult; 36 | } 37 | 38 | public void setQualityResult(Map qualityResult) { 39 | this.qualityResult = qualityResult; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/userrecommender/SpearCalculator.java: -------------------------------------------------------------------------------- 1 | package userrecommender; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.util.HashMap; 8 | import java.util.LinkedList; 9 | import java.util.Map; 10 | import java.util.TreeMap; 11 | import java.util.TreeSet; 12 | import common.Bookmark; 13 | import common.DoubleMapComparator; 14 | import file.BookmarkReader; 15 | 16 | /** 17 | * Spear Algorithm wrapper 18 | * 19 | * @author ilire.mavriqi 20 | * 21 | */ 22 | 23 | public class SpearCalculator { 24 | 25 | private static BookmarkReader reader = new BookmarkReader(0, false); 26 | 27 | public static void calculateScores(String inputDataFileName, String outputFileName, int numberOfIterations) { 28 | 29 | reader.readFile(inputDataFileName); 30 | LinkedList userList = getUsers(); 31 | 32 | HashMap> activities = getActivities(); 33 | 34 | LinkedList uniqueResources = new LinkedList(); 35 | uniqueResources.addAll(activities.keySet()); 36 | 37 | SpearAlgorithm alg = new SpearAlgorithm(activities, userList, uniqueResources); 38 | 39 | SpearAlgorithmResult result = alg.execute(numberOfIterations); 40 | 41 | // sort results 42 | Map sortedExpertiseScores = new TreeMap(new DoubleMapComparator(result.getExpertiseResult())); 43 | sortedExpertiseScores.putAll(result.getExpertiseResult()); 44 | 45 | Map sortedResourceScores = new TreeMap(new DoubleMapComparator(result.getQualityResult())); 46 | sortedResourceScores.putAll(result.getQualityResult()); 47 | 48 | try { 49 | FileWriter writerUsers = new FileWriter(new File("./data/results/spear/" + outputFileName + "_users.txt")); 50 | BufferedWriter bwUsers = new BufferedWriter(writerUsers); 51 | String userResults = "User | Expertise Score"; 52 | userResults += "\n"; 53 | userResults += "---------------------------"; 54 | userResults += "\n"; 55 | for (Map.Entry entry : sortedExpertiseScores.entrySet()) { 56 | userResults += entry.getKey() + " | " + entry.getValue(); 57 | //System.out.println ("Experts results: User with ID : " + entry.getKey() + " got the score => " +entry.getValue()); 58 | userResults += "\n"; 59 | } 60 | bwUsers.write(userResults); 61 | bwUsers.flush(); 62 | bwUsers.close(); 63 | writerUsers.close(); 64 | } catch (IOException e) { 65 | e.printStackTrace(); 66 | } 67 | 68 | try { 69 | FileWriter writerResources = new FileWriter(new File("./data/results/spear/" + outputFileName + "_resources.txt")); 70 | BufferedWriter bwResources = new BufferedWriter(writerResources); 71 | String resourceResults = "Resource | Quality Score"; 72 | resourceResults += "\n"; 73 | resourceResults += "---------------------------"; 74 | resourceResults += "\n"; 75 | for (Map.Entry entry : sortedResourceScores.entrySet()) { 76 | resourceResults += entry.getKey() + " | " +entry.getValue(); 77 | resourceResults += "\n"; 78 | //System.out.println ("Resource results: Resource with ID : " + entry.getKey() + " got the score => " +entry.getValue()); 79 | } 80 | bwResources.write(resourceResults); 81 | bwResources.flush(); 82 | bwResources.close(); 83 | writerResources.close(); 84 | } catch (IOException e) { 85 | e.printStackTrace(); 86 | } 87 | } 88 | 89 | private static LinkedList getUsers() { 90 | LinkedList userList = new LinkedList(); 91 | for (int i = 0; i < reader.getUsers().size(); i++) { 92 | userList.add(Integer.parseInt(reader.getUsers().get(i))); 93 | } 94 | return userList; 95 | } 96 | 97 | private static HashMap> getActivities() { 98 | HashMap> activities = new HashMap>(); 99 | 100 | for (Bookmark bookmark : reader.getBookmarks()) { 101 | UserActivity activity = new UserActivity(); 102 | activity.setUserId(bookmark.getUserID()); 103 | activity.setResourceId(bookmark.getResourceID()); 104 | activity.setTimeStamp(Long.parseLong(bookmark.getTimestamp())); 105 | 106 | 107 | if (activities.containsKey(bookmark.getResourceID())) { 108 | //already contains values, get list, add new value 109 | TreeSet values = activities.get(bookmark.getResourceID()); 110 | values.add(activity); 111 | activities.put(bookmark.getResourceID(), values); 112 | 113 | } else { 114 | TreeSet userActivityTree = new TreeSet(); 115 | userActivityTree.add(activity); 116 | activities.put(bookmark.getResourceID(), userActivityTree); 117 | } 118 | } 119 | 120 | return activities; 121 | } 122 | 123 | } 124 | -------------------------------------------------------------------------------- /src/userrecommender/TestSpearCalculator.java: -------------------------------------------------------------------------------- 1 | package userrecommender; 2 | 3 | /** 4 | * Test the Spear Algorithm 5 | * 6 | * @author ilire.mavriqi 7 | * 8 | */ 9 | 10 | public class TestSpearCalculator { 11 | 12 | public static void main(String[] args) { 13 | 14 | SpearCalculator.calculateScores("spear/spear_test_data", "", 250); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/userrecommender/UserActivity.java: -------------------------------------------------------------------------------- 1 | package userrecommender; 2 | 3 | /** 4 | * User activity with a time stamp in milisec for a given resource. 5 | * 6 | * @author ilire.mavriqi 7 | * 8 | */ 9 | public class UserActivity implements Comparable { 10 | private int resourceId = 0; 11 | private int userId = 0; 12 | private long timeStamp = 0; 13 | 14 | public UserActivity() { 15 | super(); 16 | } 17 | 18 | public int getResourceId() { 19 | return resourceId; 20 | } 21 | 22 | public void setResourceId(int resourceId) { 23 | this.resourceId = resourceId; 24 | } 25 | 26 | public int getUserId() { 27 | return userId; 28 | } 29 | 30 | public void setUserId(int userId) { 31 | this.userId = userId; 32 | } 33 | 34 | public long getTimeStamp() { 35 | return timeStamp; 36 | } 37 | 38 | public void setTimeStamp(long timeStamp) { 39 | this.timeStamp = timeStamp; 40 | } 41 | 42 | @Override 43 | public int compareTo(UserActivity activity) { 44 | int result = -2; 45 | if (this.timeStamp < activity.getTimeStamp()) { 46 | result = -1; 47 | } 48 | if (this.timeStamp >= activity.getTimeStamp()) { 49 | result = 1; 50 | } 51 | return result; 52 | } 53 | } 54 | --------------------------------------------------------------------------------