├── .gitignore
├── README.md
├── examples
└── backtothefuture
│ ├── README.md
│ ├── build.xml
│ └── src
│ └── BackToTheFuture.java
├── pom.xml
└── src
├── main
└── java
│ └── org
│ └── opengraph
│ ├── MetaElement.java
│ ├── OpenGraph.java
│ └── OpenGraphNamespace.java
└── test
└── main
└── java
└── org
└── opengraph
└── OpenGraphTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | #Ignore
2 | bin/*
3 | testreport/*
4 | examples/backtothefuture/build/*
5 | target/*
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | OpenGraph for Java is a small class used to represent the Open Graph protocol (available from http://opengraphprotocol.org/)
2 |
3 | This project is entirely open source due to the size of the code, so please go nuts and do whatever you want with the code.
4 |
5 | Due to the lack of useful native DOM parsers this class implements the HTMLCleaner library (available at http://htmlcleaner.sourceforge.net/index.php), please find the license for it at the bottom of this readme. There is the hope to remove reliance on third party libraries at a later stage.
6 | ## Usage ##
7 | In this example we will fetch the og:title and og:type contents, while ignoring any errors if this page does not comply with the Open Graph protocol standard (set in the constructor via true)
8 |
9 | > OpenGraph testPage = new OpenGraph("http://uk.rottentomatoes.com/m/1217700-kick_ass", true);
10 |
11 | > String title = testPage.getContent("title");
12 |
13 | > String type = testPage.getContent("type");
14 |
15 | Another example (available in the examples/ folder) demonstrates the support for custom OpenGraph namespaces
16 |
17 | OpenGraph movie = new OpenGraph("http://www.rottentomatoes.com/m/back_to_the_future/", true);
18 | System.out.println("Movie: " + movie.getContent("title"));
19 | for (MetaElement director : movie.getProperties("director"))
20 | {
21 | OpenGraph extendedInfo = director.getExtendedData();
22 | System.out.println("Directed by: " + extendedInfo.getContent("title"));
23 | }
24 |
25 | for (MetaElement member : movie.getProperties("cast"))
26 | {
27 | OpenGraph extendedInfo = member.getExtendedData();
28 | System.out.println("Starring: " + extendedInfo.getContent("title"));
29 | }
30 |
31 | ## Features ##
32 | * Uses the correct charset of the page during parsing (thanks to [rkhmelyuk](https://github.com/rkhmelyuk))
33 | * Support's the ability for social applications to declare their own namespaces and OpenGraph meta
34 | * Hashtable like representation of an Open Graph page
35 | * Output to HTML (render the meta data back out as tags)
36 | * Create OG data from scratch (the ability to use this class as a reverse and generate OG meta tags)
37 |
38 | ## License ##
39 | HTMLCleaner license (taken from http://htmlcleaner.sourceforge.net/license.php)
40 |
41 | Copyright (c) 2006-2007, HtmlCleaner team.
42 | All rights reserved.
43 |
44 | Redistribution and use of this software in source and binary forms,
45 | with or without modification, are permitted provided that the
46 | following conditions are met:
47 |
48 | * Redistributions of source code must retain the above
49 | copyright notice, this list of conditions and the
50 | following disclaimer.
51 |
52 | * Redistributions in binary form must reproduce the above
53 | copyright notice, this list of conditions and the
54 | following disclaimer in the documentation and/or other
55 | materials provided with the distribution.
56 |
57 | * The name of HtmlCleaner may not be used to endorse or promote
58 | products derived from this software without specific prior
59 | written permission.
60 |
61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
62 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
63 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
64 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
65 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
66 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
67 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
68 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
69 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
70 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
71 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
72 |
--------------------------------------------------------------------------------
/examples/backtothefuture/README.md:
--------------------------------------------------------------------------------
1 | This example showcases the extended OpenGraph namespaces that some web applications utilise, in this case Flixster exposes extended graph data about directors and cast members for a particular movie.
2 |
3 | When querying the movie "Back to the Future", we receive the following data by expanding the graph as published from Flixter's extended namespace.
4 |
5 | $ java -jar BackToTheFuture.jar
6 | Movie: Back to the Future
7 | Directed by: Robert Zemeckis
8 | Starring: Michael J. Fox
9 | Starring: Christopher Lloyd
10 | Starring: Crispin Glover
11 | Starring: Lea Thompson
12 | Starring: Thomas F. Wilson
13 | Starring: James Tolkan
14 | Starring: Claudia Wells
15 | Starring: Wendie Jo Sperber
16 | Starring: Marc McClure
17 | Starring: George DiCenzo
18 | Starring: Norman Alden
19 | Starring: Ivy Bethune
20 | Starring: Maia Brewton
--------------------------------------------------------------------------------
/examples/backtothefuture/build.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/examples/backtothefuture/src/BackToTheFuture.java:
--------------------------------------------------------------------------------
1 | package org.opengraph.examples;
2 |
3 | import org.opengraph.OpenGraph;
4 | import org.opengraph.MetaElement;
5 |
6 | public class BackToTheFuture {
7 | static String uri = "http://www.rottentomatoes.com/m/back_to_the_future/";
8 |
9 | public static void main(String [] args)
10 | {
11 | try
12 | {
13 | OpenGraph movie = new OpenGraph(uri, true);
14 | System.out.println("Movie: " + movie.getContent("title"));
15 | for (MetaElement director : movie.getProperties("director"))
16 | {
17 | OpenGraph extendedInfo = director.getExtendedData();
18 | System.out.println("Directed by: " + extendedInfo.getContent("title"));
19 | }
20 |
21 | for (MetaElement member : movie.getProperties("cast"))
22 | {
23 | OpenGraph extendedInfo = member.getExtendedData();
24 | System.out.println("Starring: " + extendedInfo.getContent("title"));
25 | }
26 |
27 | }
28 | catch (Exception e)
29 | {
30 | e.printStackTrace();
31 | }
32 | }
33 | }
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | OpenGraph
5 | OpenGraph
6 | 0.0.1-SNAPSHOT
7 | A Facebook OpenGraph implementation for Java
8 | OpenGraph for Java
9 |
10 |
11 |
12 | org.hamcrest
13 | hamcrest-core
14 | 1.3
15 |
16 |
17 | net.sourceforge.htmlcleaner
18 | htmlcleaner
19 | 2.16
20 |
21 |
22 | junit
23 | junit
24 | 4.12
25 |
26 |
27 |
28 |
29 | src
30 |
31 |
32 | maven-compiler-plugin
33 | 3.3
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/src/main/java/org/opengraph/MetaElement.java:
--------------------------------------------------------------------------------
1 | package org.opengraph;
2 |
3 | import java.net.URL;
4 |
5 | /**
6 | * Represents OpenGraph enabled meta data for a specific document
7 | * @author Callum Jones
8 | */
9 | public class MetaElement
10 | {
11 | private OpenGraphNamespace namespace; //either "og" an NS specific
12 | private String property;
13 | private String content;
14 |
15 | /**
16 | * Construct the representation of an element
17 | * @param namespace The namespace the element belongs to
18 | * @param property The property key
19 | * @param content The content or value of this element
20 | */
21 | public MetaElement(OpenGraphNamespace namespace, String property, String content)
22 | {
23 | this.namespace = namespace;
24 | this.property = property;
25 | this.content = content;
26 | }
27 |
28 | /**
29 | * Fetch the content string of the element
30 | */
31 | public String getContent()
32 | {
33 | return content;
34 | }
35 |
36 | /**
37 | * Fetch the OpenGraph namespace
38 | */
39 | public OpenGraphNamespace getNamespace()
40 | {
41 | return namespace;
42 | }
43 |
44 | /**
45 | * Fetch the property of the element
46 | */
47 | public String getProperty()
48 | {
49 | return property;
50 | }
51 |
52 | /**
53 | * Fetch the OpenGraph data from the object
54 | * @return If the content is a URL, then an attempted will be made to build OpenGraph data from the object
55 | */
56 | public OpenGraph getExtendedData()
57 | {
58 | //The Java language should know the best form of a URL
59 | try
60 | {
61 | URL url = new URL(getContent());
62 |
63 | //success
64 | return new OpenGraph(url.toString(), true);
65 | }
66 | catch (Exception e)
67 | {
68 | return null; //not a valid URL
69 | }
70 | }
71 | }
--------------------------------------------------------------------------------
/src/main/java/org/opengraph/OpenGraph.java:
--------------------------------------------------------------------------------
1 | package org.opengraph;
2 |
3 | import org.htmlcleaner.HtmlCleaner;
4 | import org.htmlcleaner.TagNode;
5 |
6 | import java.io.BufferedReader;
7 | import java.io.InputStreamReader;
8 | import java.net.URL;
9 | import java.net.URLConnection;
10 | import java.nio.charset.Charset;
11 | import java.util.ArrayList;
12 | import java.util.Hashtable;
13 | import java.util.regex.Matcher;
14 | import java.util.regex.Pattern;
15 |
16 | /**
17 | * A Java object representation of an Open Graph enabled webpage.
18 | * A simplified layer over a Hastable.
19 | *
20 | * @author Callum Jones
21 | */
22 | public class OpenGraph
23 | {
24 | private String pageUrl;
25 | private ArrayList pageNamespaces;
26 | private Hashtable> metaAttributes;
27 | private String baseType;
28 | private boolean isImported; // determine if the object is a new incarnation or representation of a web page
29 | private boolean hasChanged; // track if object has been changed
30 |
31 | public final static String[] REQUIRED_META = new String[]{"title", "type", "image", "url" };
32 |
33 | public final static Hashtable BASE_TYPES = new Hashtable();
34 | static
35 | {
36 | BASE_TYPES.put("activity", new String[] {"activity", "sport"});
37 | BASE_TYPES.put("business", new String[] {"bar", "company", "cafe", "hotel", "restaurant"});
38 | BASE_TYPES.put("group", new String[] {"cause", "sports_league", "sports_team"});
39 | BASE_TYPES.put("organization", new String[] {"band", "government", "non_profit", "school", "university"});
40 | BASE_TYPES.put("person", new String[] {"actor", "athlete", "author", "director", "musician", "politician", "profile", "public_figure"});
41 | BASE_TYPES.put("place", new String[] {"city", "country", "landmark", "state_province"});
42 | BASE_TYPES.put("product", new String[] {"album", "book", "drink", "food", "game", "movie", "product", "song", "tv_show"});
43 | BASE_TYPES.put("website", new String[] {"blog", "website", "article"});
44 | }
45 |
46 | /**
47 | * Create an open graph representation for generating your own Open Graph object
48 | */
49 | public OpenGraph()
50 | {
51 | pageNamespaces = new ArrayList();
52 | metaAttributes = new Hashtable>();
53 | hasChanged = false;
54 | isImported = false;
55 | }
56 |
57 | /**
58 | * Fetch the open graph representation from a web site
59 | * @param url The address to the web page to fetch Open Graph data
60 | * @param ignoreSpecErrors Set this option to true if you don't wish to have an exception throw if the page does not conform to the basic 4 attributes
61 | * @throws java.io.IOException If a network error occurs, the HTML parser will throw an IO Exception
62 | * @throws java.lang.Exception A generic exception is throw if the specific page fails to conform to the basic Open Graph standard as define by the constant REQUIRED_META
63 | */
64 | public OpenGraph(String url, boolean ignoreSpecErrors) throws java.io.IOException, Exception {
65 | this();
66 | isImported = true;
67 |
68 |
69 | // download the (X)HTML content, but only up to the closing head tag. We do not want to waste resources parsing irrelevant content
70 | URL pageURL = new URL(url);
71 | URLConnection siteConnection = pageURL.openConnection();
72 | Charset charset = getConnectionCharset(siteConnection);
73 | BufferedReader dis = new BufferedReader(new InputStreamReader(siteConnection.getInputStream(), charset));
74 | String inputLine;
75 | StringBuffer headContents = new StringBuffer();
76 |
77 | // Loop through each line, looking for the closing head element
78 | while ((inputLine = dis.readLine()) != null)
79 | {
80 | if (inputLine.contains(""))
81 | {
82 | inputLine = inputLine.substring(0, inputLine.indexOf("") + 7);
83 | inputLine = inputLine.concat("