├── .gitignore ├── src ├── main │ ├── resources │ │ └── queries │ │ │ ├── TweeterLocation.txt │ │ │ ├── TweetsMentioningMe.txt │ │ │ ├── TweetGeoLocation.txt │ │ │ └── TweeterStatistic.txt │ └── java │ │ └── at │ │ └── jku │ │ └── semantic │ │ └── twitter │ │ ├── queries │ │ ├── FileTwitterQuery.java │ │ ├── GeoLocationQuery.java │ │ ├── TweetsMentioningMeQuery.java │ │ ├── TimezoneQuery.java │ │ ├── TwitterQuery.java │ │ └── TweeterStatisticQuery.java │ │ ├── Constants.java │ │ └── TwitterRDFExtractor.java └── test │ └── java │ └── at │ └── jku │ └── semantic │ └── twitter │ ├── TestExtractRDFDataFromTwitter.java │ └── TestQueries.java ├── README └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.classpath 3 | *.metadata 4 | *.settings 5 | *.project 6 | *target 7 | *twitter.properties 8 | *userModels -------------------------------------------------------------------------------- /src/main/resources/queries/TweeterLocation.txt: -------------------------------------------------------------------------------- 1 | PREFIX stweeter: 2 | SELECT ?name ?location ?timezone 3 | WHERE { 4 | ?x stweeter:nick ?name . 5 | OPTIONAL { 6 | ?x stweeter:location ?location . 7 | ?x stweeter:timezone ?timezone . 8 | } 9 | } -------------------------------------------------------------------------------- /src/main/resources/queries/TweetsMentioningMe.txt: -------------------------------------------------------------------------------- 1 | PREFIX stweeter: 2 | PREFIX stweet: 3 | SELECT ?tweeterNick ?tweet 4 | WHERE { 5 | ?tweeter stweeter:hasTweet ?tweet . 6 | ?tweeter stweeter:nick ?tweeterNick . 7 | ?tweet stweet:mentionedUser ?me . 8 | FILTER regex(str(?me), "pangratz") 9 | } -------------------------------------------------------------------------------- /src/main/resources/queries/TweetGeoLocation.txt: -------------------------------------------------------------------------------- 1 | PREFIX stweeter: 2 | PREFIX stweet: 3 | SELECT ?nick ?tweet ?loc 4 | WHERE { 5 | ?tweeter stweeter:nick ?nick . 6 | ?tweeter stweeter:hasTweet ?tweet . 7 | ?tweet stweet:hasGeoLocation ?hasLoc . 8 | ?tweet stweet:geoLocation ?loc 9 | FILTER ( ?hasLoc = true ) 10 | } -------------------------------------------------------------------------------- /src/main/resources/queries/TweeterStatistic.txt: -------------------------------------------------------------------------------- 1 | PREFIX stweeter: 2 | SELECT ?name ?followerCount ?friendsCount ?favoritesCount ?statusCount 3 | WHERE { 4 | ?x stweeter:nick ?name . 5 | ?x stweeter:followerCount ?followerCount . 6 | ?x stweeter:friendsCount ?friendsCount . 7 | ?x stweeter:favoritesCount ?favoritesCount . 8 | ?x stweeter:statusCount ?statusCount . 9 | } -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | this project is the implementation for the semantic web course at jku. 2 | 3 | the idea behind is to grab your twitter followers and their folowers and do some analytic: 4 | 5 | * what is the mean/max/min number of followers 6 | * what is the mean/max/min tweets per time slot (day, week, year) 7 | * what the is the relationship between the people i am following and the people they are following; especially what are the most common friends 8 | * when are the most tweets (in the morning, afternoon, evening, night) 9 | * in which location are the most followers (europe (germany, austria, ...), us, asia, ...) 10 | * what is the percentage of retweets in proportion to "normal" tweets 11 | * what is the distribution of different types of tweets (link to videos, link to images, retweets, ...) -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/queries/FileTwitterQuery.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter.queries; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | 6 | import org.apache.commons.io.IOUtil; 7 | 8 | public abstract class FileTwitterQuery extends TwitterQuery { 9 | 10 | @Override 11 | protected String getQuery() { 12 | String fileName = getQueryFileName(); 13 | if (!fileName.startsWith("/queries/")) { 14 | fileName = "/queries/" + fileName; 15 | } 16 | InputStream inStream = FileTwitterQuery.class.getResourceAsStream(fileName); 17 | try { 18 | return IOUtil.toString(inStream); 19 | } catch (IOException e) { 20 | throw new IllegalStateException(e); 21 | } 22 | } 23 | 24 | protected abstract String getQueryFileName(); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/at/jku/semantic/twitter/TestExtractRDFDataFromTwitter.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter; 2 | 3 | import java.io.File; 4 | import java.util.Properties; 5 | 6 | import junit.framework.TestCase; 7 | 8 | public class TestExtractRDFDataFromTwitter extends TestCase { 9 | 10 | // set to false, if you want to extract all the data for each friend of 11 | // yours on twitter --> time consuming 12 | boolean ignore = true; 13 | 14 | public void testExtractRDFData() throws Exception { 15 | 16 | if (ignore) 17 | return; 18 | 19 | // read the OAuth keys/secrets from properties file 20 | Properties props = new Properties(); 21 | props.load(TwitterRDFExtractor.class.getResourceAsStream("/twitter.properties")); 22 | String consumerKey = props.getProperty("twitter.oauth.consumerKey"); 23 | String consumerSecret = props.getProperty("twitter.oauth.consumerSecret"); 24 | String token = props.getProperty("twitter.oauth.token"); 25 | String tokenSecret = props.getProperty("twitter.oauth.tokenSecret"); 26 | 27 | File dir = new File("userModels"); 28 | dir.mkdirs(); 29 | 30 | new TwitterRDFExtractor(dir, consumerKey, consumerSecret, token, tokenSecret); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/at/jku/semantic/twitter/TestQueries.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter; 2 | 3 | import java.io.File; 4 | 5 | import junit.framework.TestCase; 6 | import at.jku.semantic.twitter.queries.GeoLocationQuery; 7 | import at.jku.semantic.twitter.queries.TimezoneQuery; 8 | import at.jku.semantic.twitter.queries.TweeterStatisticQuery; 9 | import at.jku.semantic.twitter.queries.TweetsMentioningMeQuery; 10 | import at.jku.semantic.twitter.queries.TwitterQuery; 11 | 12 | import com.hp.hpl.jena.rdf.model.Model; 13 | 14 | public class TestQueries extends TestCase { 15 | 16 | private Model model; 17 | 18 | @Override 19 | protected void setUp() throws Exception { 20 | super.setUp(); 21 | 22 | model = TwitterQuery.loadModels(new File("userModels")); 23 | } 24 | 25 | @Override 26 | protected void tearDown() throws Exception { 27 | super.tearDown(); 28 | 29 | model = null; 30 | } 31 | 32 | public void testTweeterStatisticQuery() throws Exception { 33 | new TweeterStatisticQuery().executeQuery(model); 34 | } 35 | 36 | public void testTweetsMentioningMeQuery() throws Exception { 37 | new TweetsMentioningMeQuery().executeQuery(model); 38 | } 39 | 40 | public void testGeoLocationQuery() throws Exception { 41 | new GeoLocationQuery().executeQuery(model); 42 | } 43 | 44 | public void testTimezoneQuery() throws Exception { 45 | new TimezoneQuery().executeQuery(model); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/queries/GeoLocationQuery.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter.queries; 2 | 3 | import java.util.Set; 4 | 5 | import com.google.common.collect.Multiset.Entry; 6 | import com.google.common.collect.TreeMultiset; 7 | import com.hp.hpl.jena.query.QuerySolution; 8 | 9 | public class GeoLocationQuery extends FileTwitterQuery { 10 | 11 | private TreeMultiset tweeters; 12 | 13 | @Override 14 | protected String getDescription() { 15 | return "query all tweets with geo location and gather statistics"; 16 | } 17 | 18 | @Override 19 | protected String getQueryFileName() { 20 | return "TweetGeoLocation.txt"; 21 | } 22 | 23 | @Override 24 | protected String beforeQueryProcessing() { 25 | tweeters = TreeMultiset.create(); 26 | return null; 27 | } 28 | 29 | @Override 30 | protected String processQuerySolution(QuerySolution querySolution) { 31 | 32 | String nick = querySolution.getLiteral("nick").getString(); 33 | String location = querySolution.getLiteral("loc").getString(); 34 | 35 | tweeters.add(nick); 36 | 37 | return null; 38 | } 39 | 40 | @Override 41 | protected String afterQueryProcessing() { 42 | StringBuilder log = new StringBuilder(); 43 | 44 | Set> entries = tweeters.entrySet(); 45 | for (Entry entry : entries) { 46 | int count = entry.getCount(); 47 | String nick = entry.getElement(); 48 | 49 | log.append(nick).append(" has ").append(count).append(" tweets with geo location").append(NEW_LINE); 50 | } 51 | 52 | return log.toString(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/queries/TweetsMentioningMeQuery.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter.queries; 2 | 3 | import java.util.Iterator; 4 | import java.util.Set; 5 | 6 | import com.google.common.collect.Multiset.Entry; 7 | import com.google.common.collect.TreeMultiset; 8 | import com.hp.hpl.jena.query.QuerySolution; 9 | 10 | public class TweetsMentioningMeQuery extends FileTwitterQuery { 11 | 12 | private TreeMultiset tweetersMentioningMe; 13 | 14 | @Override 15 | protected String getQueryFileName() { 16 | return "TweetsMentioningMe.txt"; 17 | } 18 | 19 | @Override 20 | protected String getDescription() { 21 | return "get statistics of all tweeters who tweeted about you"; 22 | } 23 | 24 | @Override 25 | protected String beforeQueryProcessing() { 26 | tweetersMentioningMe = TreeMultiset.create(); 27 | return null; 28 | } 29 | 30 | @Override 31 | protected String processQuerySolution(QuerySolution querySolution) { 32 | 33 | // nick of the tweeter, who tweeted about me 34 | String tweeterNick = querySolution.get("tweeterNick").asLiteral().getString(); 35 | tweetersMentioningMe.add(tweeterNick); 36 | 37 | return null; 38 | } 39 | 40 | @Override 41 | protected String afterQueryProcessing() { 42 | StringBuilder log = new StringBuilder(); 43 | 44 | Set> entries = tweetersMentioningMe.entrySet(); 45 | Iterator> it = entries.iterator(); 46 | while (it.hasNext()) { 47 | Entry entry = it.next(); 48 | log.append(entry.getElement()).append(" tweeted "); 49 | log.append(entry.getCount()).append(" times about you"); 50 | log.append(NEW_LINE); 51 | } 52 | 53 | return log.toString(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | at.jku.semantic 6 | twitter 7 | 0.0.1-SNAPSHOT 8 | jar 9 | 10 | twitter 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | 20 | org.apache.maven.plugins 21 | maven-compiler-plugin 22 | 23 | 1.5 24 | 1.5 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | org.twitter4j 33 | twitter4j-core 34 | 2.1.12 35 | 36 | 37 | junit 38 | junit 39 | 3.8.1 40 | test 41 | 42 | 43 | com.hp.hpl.jena 44 | jena 45 | 2.6.3 46 | 47 | 48 | com.hp.hpl.jena 49 | arq 50 | 2.8.5 51 | 52 | 53 | commons-io 54 | commons-io 55 | 20030203.000550 56 | 57 | 58 | com.google.collections 59 | google-collections 60 | 1.0 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/queries/TimezoneQuery.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter.queries; 2 | 3 | import java.util.Iterator; 4 | import java.util.Set; 5 | 6 | import com.google.common.collect.Multiset.Entry; 7 | import com.google.common.collect.TreeMultiset; 8 | import com.hp.hpl.jena.query.QuerySolution; 9 | import com.hp.hpl.jena.rdf.model.RDFNode; 10 | 11 | public class TimezoneQuery extends FileTwitterQuery { 12 | 13 | private TreeMultiset timezones; 14 | 15 | public TimezoneQuery() { 16 | super(); 17 | 18 | } 19 | 20 | @Override 21 | protected String getQueryFileName() { 22 | return "TweeterLocation.txt"; 23 | } 24 | 25 | @Override 26 | protected String getDescription() { 27 | return "query all tweets and check the different time zones of your friends"; 28 | } 29 | 30 | @Override 31 | protected String beforeQueryProcessing() { 32 | timezones = TreeMultiset.create(); 33 | return null; 34 | } 35 | 36 | @Override 37 | protected String processQuerySolution(QuerySolution querySolution) { 38 | RDFNode timezoneNode = querySolution.get("timezone"); 39 | String timezone = (timezoneNode != null) ? timezoneNode.asLiteral().getString() : null; 40 | 41 | RDFNode nameNode = querySolution.get("name"); 42 | String name = (nameNode != null) ? nameNode.asLiteral().getString() : null; 43 | 44 | if (timezone == null) 45 | timezone = "null"; 46 | 47 | timezones.add(timezone); 48 | 49 | return null; 50 | } 51 | 52 | @Override 53 | protected String afterQueryProcessing() { 54 | StringBuilder log = new StringBuilder(); 55 | 56 | Set> entries = timezones.entrySet(); 57 | Iterator> it = entries.iterator(); 58 | while (it.hasNext()) { 59 | Entry entry = it.next(); 60 | String timezone = entry.getElement(); 61 | int count = entry.getCount(); 62 | 63 | if ("null".equals(timezone)) 64 | log.append(count).append(" users haven't defined a timezone").append(NEW_LINE); 65 | else 66 | log.append(count).append(" users are in ").append(timezone).append(NEW_LINE); 67 | } 68 | 69 | return log.toString(); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/Constants.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter; 2 | 3 | public interface Constants { 4 | 5 | public static final String FOAF_NS = "http://xmlns.com/foaf/0.1/Person#"; 6 | public static final String SEMANTIC_TWEETER_NS = "http://twitter.com/semantic/Tweeter#"; 7 | public static final String SEMANTIC_TWEET_NS = "http://twitter.com/semantic/Tweet#"; 8 | 9 | public static final String SEMANTIC_TWEETER_CLASS = "Tweeter"; 10 | public static final String SEMANTIC_TWEETER_NICK = "nick"; 11 | public static final String SEMANTIC_TWEETER_STATUS_COUNT = "statusCount"; 12 | public static final String SEMANTIC_TWEETER_LOCATION = "location"; 13 | public static final String SEMANTIC_TWEETER_FOLLOWER_COUNT = "followerCount"; 14 | public static final String SEMANTIC_TWEETER_FRIENDS_COUNT = "friendsCount"; 15 | public static final String SEMANTIC_TWEETER_FAVOURITES_COUNT = "favoritesCount"; 16 | public static final String SEMANTIC_TWEETER_UTC_OFFSET = "utcOffset"; 17 | public static final String SEMANTIC_TWEETER_TIMEZONE = "timezone"; 18 | public static final String SEMANTIC_TWEETER_HAS_TWEET = "hasTweet"; 19 | public static final String SEMANTIC_TWEETER_CREATED_AT_YEAR = "createdAtYear"; 20 | public static final String SEMANTIC_TWEETER_CREATED_AT_MONTH = "createdAtMonth"; 21 | public static final String SEMANTIC_TWEETER_CREATED_AT_DAY_OF_WEEK = "createdAtDayOfWeek"; 22 | 23 | public static final String SEMANTIC_TWEET_CLASS = "Tweet"; 24 | public static final String SEMANTIC_TWEET_IS_RETWEET = "isRetweet"; 25 | public static final String SEMANTIC_TWEET_STATUS_LENGHT = "statusLength"; 26 | public static final String SEMANTIC_TWEET_HAS_URLS = "hasUrls"; 27 | public static final String SEMANTIC_TWEET_URL = "url"; 28 | public static final String SEMANTIC_TWEET_HASH_TAG = "hashTag"; 29 | public static final String SEMANTIC_TWEET_GEO_LOCATION = "geoLocation"; 30 | public static final String SEMANTIC_TWEET_HAS_USER_MENTIONS = "hasUserMentions"; 31 | public static final String SEMANTIC_TWEET_MENTIONED_USER = "mentionedUser"; 32 | public static final String SEMANTIC_TWEET_YEAR = "tweetYear"; 33 | public static final String SEMANTIC_TWEET_MONTH = "tweetMonth"; 34 | public static final String SEMANTIC_TWEET_DAY_OF_WEEK = "tweetDayOfWeek"; 35 | public static final String SEMANTIC_TWEET_HAS_GEO_LOCATION = "hasGeoLocation"; 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/queries/TwitterQuery.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter.queries; 2 | 3 | import java.io.File; 4 | import java.io.FilenameFilter; 5 | 6 | import com.hp.hpl.jena.query.QueryExecution; 7 | import com.hp.hpl.jena.query.QueryExecutionFactory; 8 | import com.hp.hpl.jena.query.QuerySolution; 9 | import com.hp.hpl.jena.query.ResultSet; 10 | import com.hp.hpl.jena.rdf.model.Model; 11 | import com.hp.hpl.jena.rdf.model.ModelFactory; 12 | import com.hp.hpl.jena.util.FileManager; 13 | 14 | public abstract class TwitterQuery { 15 | 16 | protected static final String NEW_LINE = "\n"; 17 | 18 | public static final Model loadModels(File dir) { 19 | Model model = ModelFactory.createDefaultModel(); 20 | FileManager fileManager = FileManager.get(); 21 | File[] xmlFiles = dir.listFiles(new FilenameFilter() { 22 | public boolean accept(File dir, String name) { 23 | return name.endsWith(".xml"); 24 | } 25 | }); 26 | 27 | for (File modelFile : xmlFiles) { 28 | Model loadedModel = fileManager.loadModel(modelFile.toURI().toString()); 29 | model.add(loadedModel); 30 | } 31 | 32 | return model; 33 | } 34 | 35 | public final void executeQuery(Model model) { 36 | String query = getQuery(); 37 | QueryExecution queryExecution = QueryExecutionFactory.create(query, model); 38 | 39 | StringBuilder log = new StringBuilder("log for "); 40 | log.append(getDescription()).append(NEW_LINE); 41 | 42 | String before = beforeQueryProcessing(); 43 | if (before != null) 44 | log.append(" ###").append(NEW_LINE).append(before).append(NEW_LINE); 45 | 46 | String processed = processQueryResult(queryExecution.execSelect()); 47 | if (processed != null && processed.length() > 0) 48 | log.append(" ###").append(NEW_LINE).append(processed).append(NEW_LINE); 49 | 50 | String after = afterQueryProcessing(); 51 | if (after != null) 52 | log.append(" ###").append(NEW_LINE).append(after); 53 | 54 | System.out.println(log.toString()); 55 | 56 | queryExecution.close(); 57 | } 58 | 59 | protected String getDescription() { 60 | return "example query"; 61 | } 62 | 63 | protected String afterQueryProcessing() { 64 | return null; 65 | } 66 | 67 | protected String beforeQueryProcessing() { 68 | return null; 69 | } 70 | 71 | protected abstract String getQuery(); 72 | 73 | protected String processQueryResult(ResultSet result) { 74 | StringBuilder log = new StringBuilder(); 75 | while (result.hasNext()) { 76 | QuerySolution querySolution = result.next(); 77 | String tanga = processQuerySolution(querySolution); 78 | if (tanga != null) 79 | log.append(tanga).append(NEW_LINE); 80 | } 81 | log.trimToSize(); 82 | return log.toString(); 83 | } 84 | 85 | protected String processQuerySolution(QuerySolution querySolution) { 86 | return querySolution.toString(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/queries/TweeterStatisticQuery.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter.queries; 2 | 3 | import com.hp.hpl.jena.query.QuerySolution; 4 | import com.hp.hpl.jena.rdf.model.RDFNode; 5 | 6 | public class TweeterStatisticQuery extends FileTwitterQuery { 7 | 8 | private long userCount; 9 | private long followersCount, friendsCount, favouritesCount, statusCount; 10 | private long minFollowersCount, minFriendsCount, minFavouritesCount, minStatusCount; 11 | private long maxFollowersCount, maxFriendsCount, maxFavouritesCount, maxStatusCount; 12 | 13 | @Override 14 | protected String getQueryFileName() { 15 | return "TweeterStatistic.txt"; 16 | } 17 | 18 | @Override 19 | protected String getDescription() { 20 | return "get statistics (tweet count, follower count, ...) of your friends"; 21 | } 22 | 23 | @Override 24 | protected String processQuerySolution(QuerySolution querySolution) { 25 | 26 | // get all attributes 27 | long userFollowerCount = parseLong(querySolution.get("followerCount")); 28 | long userFriendsCount = parseLong(querySolution.get("friendsCount")); 29 | long userFavouritesCount = parseLong(querySolution.get("favoritesCount")); 30 | long userStatusCount = parseLong(querySolution.get("statusCount")); 31 | 32 | minFollowersCount = Math.min(minFollowersCount, userFollowerCount); 33 | minFriendsCount = Math.min(minFriendsCount, userFriendsCount); 34 | minFavouritesCount = Math.min(minFavouritesCount, userFavouritesCount); 35 | minStatusCount = Math.min(minStatusCount, userStatusCount); 36 | 37 | maxFollowersCount = Math.max(maxFollowersCount, userFollowerCount); 38 | maxFriendsCount = Math.max(maxFriendsCount, userFriendsCount); 39 | maxFavouritesCount = Math.max(maxFavouritesCount, userFavouritesCount); 40 | maxStatusCount = Math.max(maxStatusCount, userStatusCount); 41 | 42 | followersCount += userFollowerCount; 43 | friendsCount += userFriendsCount; 44 | favouritesCount += userFavouritesCount; 45 | statusCount += userStatusCount; 46 | userCount++; 47 | 48 | return null; 49 | } 50 | 51 | private long parseLong(RDFNode rdfNode) { 52 | String s = rdfNode.asLiteral().getString(); 53 | return Long.parseLong(s); 54 | } 55 | 56 | @Override 57 | protected String afterQueryProcessing() { 58 | long avgFollowersCount = followersCount / userCount; 59 | long avgFriendsCount = friendsCount / userCount; 60 | long avgFavouritesCount = favouritesCount / userCount; 61 | long avgStatusCount = statusCount / userCount; 62 | 63 | StringBuilder log = new StringBuilder(); 64 | log.append("there are following statistics for a tweeter: ").append(NEW_LINE); 65 | 66 | appendStats(log, "followers", minFollowersCount, avgFollowersCount, maxFollowersCount); 67 | log.append(NEW_LINE); 68 | appendStats(log, "friends", minFriendsCount, avgFriendsCount, maxFriendsCount); 69 | log.append(NEW_LINE); 70 | appendStats(log, "favourites", minFavouritesCount, avgFavouritesCount, maxFavouritesCount); 71 | log.append(NEW_LINE); 72 | appendStats(log, "statuses", minStatusCount, avgStatusCount, maxStatusCount); 73 | 74 | return log.toString(); 75 | } 76 | 77 | private void appendStats(StringBuilder sb, String name, long min, long avg, long max) { 78 | sb.append(name).append(" (min/avg/max) = (").append(min).append("/"); 79 | sb.append(avg).append("/").append(max).append(")"); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/at/jku/semantic/twitter/TwitterRDFExtractor.java: -------------------------------------------------------------------------------- 1 | package at.jku.semantic.twitter; 2 | 3 | import java.io.File; 4 | import java.io.FileOutputStream; 5 | import java.net.URL; 6 | import java.util.Calendar; 7 | import java.util.Date; 8 | import java.util.Iterator; 9 | 10 | import twitter4j.GeoLocation; 11 | import twitter4j.IDs; 12 | import twitter4j.Paging; 13 | import twitter4j.ResponseList; 14 | import twitter4j.Status; 15 | import twitter4j.Twitter; 16 | import twitter4j.TwitterException; 17 | import twitter4j.TwitterFactory; 18 | import twitter4j.User; 19 | import twitter4j.http.AccessToken; 20 | 21 | import com.hp.hpl.jena.ontology.ObjectProperty; 22 | import com.hp.hpl.jena.ontology.OntClass; 23 | import com.hp.hpl.jena.ontology.OntModel; 24 | import com.hp.hpl.jena.ontology.OntModelSpec; 25 | import com.hp.hpl.jena.rdf.model.Literal; 26 | import com.hp.hpl.jena.rdf.model.Model; 27 | import com.hp.hpl.jena.rdf.model.ModelFactory; 28 | import com.hp.hpl.jena.rdf.model.Property; 29 | import com.hp.hpl.jena.rdf.model.Resource; 30 | import com.hp.hpl.jena.rdf.model.ResourceFactory; 31 | import com.hp.hpl.jena.sparql.vocabulary.FOAF; 32 | import com.hp.hpl.jena.vocabulary.XSD; 33 | 34 | public class TwitterRDFExtractor implements Constants { 35 | 36 | public TwitterRDFExtractor(File modelDir, String consumerKey, String consumerSecret, String token, String tokenSecret) throws Exception { 37 | super(); 38 | 39 | AccessToken accessToken = new AccessToken(token, tokenSecret); 40 | Twitter twitter = new TwitterFactory().getOAuthAuthorizedInstance(consumerKey, consumerSecret, accessToken); 41 | 42 | createModels(twitter, modelDir); 43 | } 44 | 45 | public Model extractModel(Twitter twitter, User user) throws TwitterException { 46 | OntModel model = createOntologyModel(); 47 | 48 | Resource userIdRes = createTweeterResource(model, user); 49 | 50 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_NICK, user.getScreenName()); 51 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_STATUS_COUNT, user.getStatusesCount()); 52 | 53 | String location = user.getLocation(); 54 | if (location != null && !"".equals(location)) { 55 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_LOCATION, location); 56 | } 57 | 58 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_FOLLOWER_COUNT, user.getFollowersCount()); 59 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_FRIENDS_COUNT, user.getFriendsCount()); 60 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_FAVOURITES_COUNT, user.getFavouritesCount()); 61 | 62 | Date createdDate = user.getCreatedAt(); 63 | Calendar cal = Calendar.getInstance(); 64 | cal.setTime(createdDate); 65 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_CREATED_AT_YEAR, cal.get(Calendar.YEAR)); 66 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_CREATED_AT_MONTH, cal.get(Calendar.MONTH)); 67 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_CREATED_AT_DAY_OF_WEEK, cal.get(Calendar.DAY_OF_WEEK)); 68 | 69 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_UTC_OFFSET, user.getUtcOffset()); 70 | addTweeterProperty(model, userIdRes, SEMANTIC_TWEETER_TIMEZONE, user.getTimeZone()); 71 | 72 | // add all the tweets 73 | int statusesCount = user.getStatusesCount(); 74 | if (statusesCount > 0) { 75 | ResponseList latestTweets = twitter.getUserTimeline(user.getId(), new Paging(1, statusesCount)); 76 | Iterator statusIt = latestTweets.iterator(); 77 | while (statusIt.hasNext()) { 78 | Status status = statusIt.next(); 79 | Resource statusRes = createTweetResource(model, status); 80 | 81 | Property hasTweetProp = ResourceFactory.createProperty(Constants.SEMANTIC_TWEETER_NS, "hasTweet"); 82 | model.add(userIdRes, hasTweetProp, statusRes); 83 | 84 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_IS_RETWEET, status.isRetweet()); 85 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_STATUS_LENGHT, status.getText().length()); 86 | 87 | URL[] urls = status.getURLs(); 88 | boolean hasUrls = (urls != null) && (urls.length > 0); 89 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_HAS_URLS, hasUrls); 90 | for (URL url : urls) { 91 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_URL, url); 92 | } 93 | 94 | String[] hashtags = status.getHashtags(); 95 | for (String hashTag : hashtags) { 96 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_HASH_TAG, hashTag); 97 | } 98 | 99 | GeoLocation geoLocation = status.getGeoLocation(); 100 | boolean hasGeoLocation = (geoLocation != null); 101 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_HAS_GEO_LOCATION, hasGeoLocation); 102 | if (hasGeoLocation) { 103 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_GEO_LOCATION, geoLocation); 104 | } 105 | 106 | User[] userMentions = status.getUserMentions(); 107 | boolean hasUserMentions = (userMentions != null) && (userMentions.length > 0); 108 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_HAS_USER_MENTIONS, hasUserMentions); 109 | for (User mentionedUser : userMentions) { 110 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_MENTIONED_USER, createTweeterResource(model, mentionedUser)); 111 | } 112 | 113 | Date createdAt = status.getCreatedAt(); 114 | Calendar tweetCal = Calendar.getInstance(); 115 | tweetCal.setTime(createdAt); 116 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_YEAR, tweetCal.get(Calendar.YEAR)); 117 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_MONTH, tweetCal.get(Calendar.MONTH)); 118 | addTweetProperty(model, statusRes, SEMANTIC_TWEET_DAY_OF_WEEK, tweetCal.get(Calendar.DAY_OF_WEEK)); 119 | } 120 | } 121 | 122 | return model; 123 | } 124 | 125 | public void createModels(Twitter twitter, File modelDir) throws Exception { 126 | // iterate over each friend and create a RDF model for each one 127 | IDs friendsIDs = twitter.getFriendsIDs(); 128 | int[] iDs = friendsIDs.getIDs(); 129 | for (int id : iDs) { 130 | 131 | boolean ok = false; 132 | do { 133 | try { 134 | 135 | User user = twitter.showUser(id); 136 | File userFile = new File(modelDir, user.getScreenName() + ".xml"); 137 | if (userFile.exists()) { 138 | System.out.println("skipping user " + user.getScreenName() + " because the model already exists"); 139 | ok = true; 140 | } else { 141 | System.out.println("creating model for " + user.getScreenName()); 142 | Model model = extractModel(twitter, user); 143 | System.out.println(" model created"); 144 | 145 | model.write(new FileOutputStream(userFile)); 146 | System.out.println(" model written to file " + userFile.getName()); 147 | ok = true; 148 | } 149 | } catch (Exception ex) { 150 | System.out.println(" exception --> wait for 1 second"); 151 | Thread.sleep(1000 * 1); 152 | } 153 | } while (!ok); 154 | 155 | } 156 | } 157 | 158 | private void add(OntModel model, Resource res, String ns, String localName, Object val, String typeURI) { 159 | if (val != null) { 160 | Property prop = model.getProperty(ns, localName); 161 | Literal lit = model.createTypedLiteral(val); 162 | if (val instanceof Resource) { 163 | model.add(res, prop, (Resource) val); 164 | } else { 165 | model.add(res, prop, lit); 166 | } 167 | } 168 | } 169 | 170 | private ObjectProperty addOntProperty(OntModel model, String ns, String localName, Resource domain, Resource range) { 171 | ObjectProperty objProperty = model.createObjectProperty(createUri(ns, localName)); 172 | objProperty.addDomain(domain); 173 | objProperty.addRange(range); 174 | return objProperty; 175 | } 176 | 177 | private void addTweeterProperty(OntModel model, Resource res, String localName, Object val) { 178 | String typeURI = createUri(SEMANTIC_TWEETER_NS, localName); 179 | add(model, res, SEMANTIC_TWEETER_NS, localName, val, typeURI); 180 | } 181 | 182 | private void addTweetProperty(OntModel model, Resource res, String localName, Object val) { 183 | String typeURI = createUri(SEMANTIC_TWEET_NS, localName); 184 | add(model, res, SEMANTIC_TWEET_NS, localName, val, typeURI); 185 | } 186 | 187 | private OntModel createOntologyModel() { 188 | OntModel model = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM_TRANS_INF); 189 | model.setNsPrefix("foaf", Constants.FOAF_NS); 190 | model.setNsPrefix("stweeter", Constants.SEMANTIC_TWEETER_NS); 191 | model.setNsPrefix("stweet", Constants.SEMANTIC_TWEET_NS); 192 | 193 | // Tweeter class 194 | OntClass tweeterClass = model.createClass(createUri(SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_CLASS)); 195 | tweeterClass.setSuperClass(FOAF.Person); 196 | tweeterClass.addLabel("a tweeter is a user on Twitter", "en"); 197 | { 198 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_NICK, tweeterClass, XSD.xstring); 199 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_STATUS_COUNT, tweeterClass, XSD.xlong); 200 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_LOCATION, tweeterClass, XSD.xstring); 201 | 202 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_FOLLOWER_COUNT, tweeterClass, XSD.xlong); 203 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_FRIENDS_COUNT, tweeterClass, XSD.xlong); 204 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_FAVOURITES_COUNT, tweeterClass, XSD.xlong); 205 | 206 | // time properties 207 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_UTC_OFFSET, tweeterClass, XSD.xlong); 208 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_TIMEZONE, tweeterClass, XSD.xstring); 209 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_CREATED_AT_YEAR, tweeterClass, XSD.xint); 210 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_CREATED_AT_MONTH, tweeterClass, XSD.xint); 211 | addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_CREATED_AT_DAY_OF_WEEK, tweeterClass, XSD.xint); 212 | } 213 | 214 | // Tweet properties 215 | OntClass tweetClass = model.createClass(createUri(SEMANTIC_TWEET_NS, SEMANTIC_TWEET_CLASS)); 216 | tweetClass.addLabel("a tweet is a status message from a tweeter on Twitter", "en"); 217 | 218 | // has tweet 219 | ObjectProperty hasTweetProperty = addOntProperty(model, SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_HAS_TWEET, tweeterClass, tweetClass); 220 | hasTweetProperty.addLabel("a tweeter can have many tweets", "en"); 221 | 222 | { 223 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_IS_RETWEET, tweetClass, XSD.xboolean); 224 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_STATUS_LENGHT, tweetClass, XSD.xint); 225 | 226 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_HAS_URLS, tweetClass, XSD.xboolean); 227 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_URL, tweetClass, XSD.xstring); 228 | 229 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_HASH_TAG, tweetClass, XSD.xstring); 230 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_GEO_LOCATION, tweetClass, XSD.xstring); 231 | 232 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_HAS_USER_MENTIONS, tweetClass, XSD.xboolean); 233 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_MENTIONED_USER, tweetClass, tweeterClass); 234 | 235 | // time properties 236 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_YEAR, tweetClass, XSD.xint); 237 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_MONTH, tweetClass, XSD.xint); 238 | addOntProperty(model, SEMANTIC_TWEET_NS, SEMANTIC_TWEET_DAY_OF_WEEK, tweetClass, XSD.xint); 239 | } 240 | 241 | return model; 242 | } 243 | 244 | private String createUri(String ns, String localName) { 245 | return ns + localName; 246 | } 247 | 248 | private Resource createTweeterResource(OntModel model, User user) { 249 | Resource clazz = model.getResource(createUri(SEMANTIC_TWEETER_NS, SEMANTIC_TWEETER_CLASS)); 250 | return model.createResource(Constants.SEMANTIC_TWEETER_NS + user.getScreenName(), clazz); 251 | } 252 | 253 | private Resource createTweetResource(OntModel model, Status status) { 254 | Resource clazz = model.getResource(createUri(SEMANTIC_TWEET_NS, SEMANTIC_TWEET_CLASS)); 255 | return model.createResource(Constants.SEMANTIC_TWEET_NS + status.getId(), clazz); 256 | } 257 | } 258 | --------------------------------------------------------------------------------