├── README.md ├── docs ├── docco.css └── wikipedia.html ├── index.html ├── js └── app.js └── wikipedia.js /README.md: -------------------------------------------------------------------------------- 1 | Extract information from Wikipedia via DBPedia using pure javascript. 2 | 3 | See the main site for more details! 4 | 5 | ## LICENSE 6 | 7 | Licensed under the [MIT License][1]. 8 | 9 | [1]: http://opensource.org/licenses/mit-license.php 10 | 11 | -------------------------------------------------------------------------------- /docs/docco.css: -------------------------------------------------------------------------------- 1 | /*--------------------- Layout and Typography ----------------------------*/ 2 | body { 3 | font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif; 4 | font-size: 15px; 5 | line-height: 22px; 6 | color: #252519; 7 | margin: 0; padding: 0; 8 | } 9 | a { 10 | color: #261a3b; 11 | } 12 | a:visited { 13 | color: #261a3b; 14 | } 15 | p { 16 | margin: 0 0 15px 0; 17 | } 18 | h1, h2, h3, h4, h5, h6 { 19 | margin: 0px 0 15px 0; 20 | } 21 | h1 { 22 | margin-top: 40px; 23 | } 24 | #container { 25 | position: relative; 26 | } 27 | #background { 28 | position: fixed; 29 | top: 0; left: 525px; right: 0; bottom: 0; 30 | background: #f5f5ff; 31 | border-left: 1px solid #e5e5ee; 32 | z-index: -1; 33 | } 34 | #jump_to, #jump_page { 35 | background: white; 36 | -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; 37 | -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; 38 | font: 10px Arial; 39 | text-transform: uppercase; 40 | cursor: pointer; 41 | text-align: right; 42 | } 43 | #jump_to, #jump_wrapper { 44 | position: fixed; 45 | right: 0; top: 0; 46 | padding: 5px 10px; 47 | } 48 | #jump_wrapper { 49 | padding: 0; 50 | display: none; 51 | } 52 | #jump_to:hover #jump_wrapper { 53 | display: block; 54 | } 55 | #jump_page { 56 | padding: 5px 0 3px; 57 | margin: 0 0 25px 25px; 58 | } 59 | #jump_page .source { 60 | display: block; 61 | padding: 5px 10px; 62 | text-decoration: none; 63 | border-top: 1px solid #eee; 64 | } 65 | #jump_page .source:hover { 66 | background: #f5f5ff; 67 | } 68 | #jump_page .source:first-child { 69 | } 70 | table td { 71 | border: 0; 72 | outline: 0; 73 | } 74 | td.docs, th.docs { 75 | max-width: 450px; 76 | min-width: 450px; 77 | min-height: 5px; 78 | padding: 10px 25px 1px 50px; 79 | overflow-x: hidden; 80 | vertical-align: top; 81 | text-align: left; 82 | } 83 | .docs pre { 84 | margin: 15px 0 15px; 85 | padding-left: 15px; 86 | } 87 | .docs p tt, .docs p code { 88 | background: #f8f8ff; 89 | border: 1px solid #dedede; 90 | font-size: 12px; 91 | padding: 0 0.2em; 92 | } 93 | .pilwrap { 94 | position: relative; 95 | } 96 | .pilcrow { 97 | font: 12px Arial; 98 | text-decoration: none; 99 | color: #454545; 100 | position: absolute; 101 | top: 3px; left: -20px; 102 | padding: 1px 2px; 103 | opacity: 0; 104 | -webkit-transition: opacity 0.2s linear; 105 | } 106 | td.docs:hover .pilcrow { 107 | opacity: 1; 108 | } 109 | td.code, th.code { 110 | padding: 14px 15px 16px 25px; 111 | width: 100%; 112 | vertical-align: top; 113 | background: #f5f5ff; 114 | border-left: 1px solid #e5e5ee; 115 | } 116 | pre, tt, code { 117 | font-size: 12px; line-height: 18px; 118 | font-family: Monaco, Consolas, "Lucida Console", monospace; 119 | margin: 0; padding: 0; 120 | } 121 | 122 | 123 | /*---------------------- Syntax Highlighting -----------------------------*/ 124 | td.linenos { background-color: #f0f0f0; padding-right: 10px; } 125 | span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } 126 | body .hll { background-color: #ffffcc } 127 | body .c { color: #408080; font-style: italic } /* Comment */ 128 | body .err { border: 1px solid #FF0000 } /* Error */ 129 | body .k { color: #954121 } /* Keyword */ 130 | body .o { color: #666666 } /* Operator */ 131 | body .cm { color: #408080; font-style: italic } /* Comment.Multiline */ 132 | body .cp { color: #BC7A00 } /* Comment.Preproc */ 133 | body .c1 { color: #408080; font-style: italic } /* Comment.Single */ 134 | body .cs { color: #408080; font-style: italic } /* Comment.Special */ 135 | body .gd { color: #A00000 } /* Generic.Deleted */ 136 | body .ge { font-style: italic } /* Generic.Emph */ 137 | body .gr { color: #FF0000 } /* Generic.Error */ 138 | body .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 139 | body .gi { color: #00A000 } /* Generic.Inserted */ 140 | body .go { color: #808080 } /* Generic.Output */ 141 | body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ 142 | body .gs { font-weight: bold } /* Generic.Strong */ 143 | body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 144 | body .gt { color: #0040D0 } /* Generic.Traceback */ 145 | body .kc { color: #954121 } /* Keyword.Constant */ 146 | body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */ 147 | body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */ 148 | body .kp { color: #954121 } /* Keyword.Pseudo */ 149 | body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */ 150 | body .kt { color: #B00040 } /* Keyword.Type */ 151 | body .m { color: #666666 } /* Literal.Number */ 152 | body .s { color: #219161 } /* Literal.String */ 153 | body .na { color: #7D9029 } /* Name.Attribute */ 154 | body .nb { color: #954121 } /* Name.Builtin */ 155 | body .nc { color: #0000FF; font-weight: bold } /* Name.Class */ 156 | body .no { color: #880000 } /* Name.Constant */ 157 | body .nd { color: #AA22FF } /* Name.Decorator */ 158 | body .ni { color: #999999; font-weight: bold } /* Name.Entity */ 159 | body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ 160 | body .nf { color: #0000FF } /* Name.Function */ 161 | body .nl { color: #A0A000 } /* Name.Label */ 162 | body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ 163 | body .nt { color: #954121; font-weight: bold } /* Name.Tag */ 164 | body .nv { color: #19469D } /* Name.Variable */ 165 | body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ 166 | body .w { color: #bbbbbb } /* Text.Whitespace */ 167 | body .mf { color: #666666 } /* Literal.Number.Float */ 168 | body .mh { color: #666666 } /* Literal.Number.Hex */ 169 | body .mi { color: #666666 } /* Literal.Number.Integer */ 170 | body .mo { color: #666666 } /* Literal.Number.Oct */ 171 | body .sb { color: #219161 } /* Literal.String.Backtick */ 172 | body .sc { color: #219161 } /* Literal.String.Char */ 173 | body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */ 174 | body .s2 { color: #219161 } /* Literal.String.Double */ 175 | body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ 176 | body .sh { color: #219161 } /* Literal.String.Heredoc */ 177 | body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ 178 | body .sx { color: #954121 } /* Literal.String.Other */ 179 | body .sr { color: #BB6688 } /* Literal.String.Regex */ 180 | body .s1 { color: #219161 } /* Literal.String.Single */ 181 | body .ss { color: #19469D } /* Literal.String.Symbol */ 182 | body .bp { color: #954121 } /* Name.Builtin.Pseudo */ 183 | body .vc { color: #19469D } /* Name.Variable.Class */ 184 | body .vg { color: #19469D } /* Name.Variable.Global */ 185 | body .vi { color: #19469D } /* Name.Variable.Instance */ 186 | body .il { color: #666666 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/wikipedia.html: -------------------------------------------------------------------------------- 1 |
wikipedia.js | |
---|---|
var WIKIPEDIA = function() {
2 | var my = {}; | |
DBPedia SPARQL endpoint | my.endpoint = 'http://dbpedia.org/sparql/'; |
getData3 | 4 |Return structured information (via callback) on the provided Wikipedia URL by querying 5 | the DBPedia SPARQL endpoint and then tidying the data up. 6 | 7 |Data is return in the form of the following hash: 8 | 9 |{ 10 | raw: the-raw-json-from-dbpedia, 11 | summary: a-cleaned-up-set-of-the-properties (see extractSummary), 12 | dbpediaUrl: dbpedia-resource-url e.g. http://dbpedia.org/resource/WorldWarII 13 | } 14 | 15 |Function is asynchronous as we have to call out to DBPedia to get the 16 | info. | my.getData = function(wikipediaUrl, callback, error) {
17 | var url = my._getDbpediaUrl(wikipediaUrl);
18 | function onSuccess(data) {
19 | callback({
20 | raw: data,
21 | dbpediaUrl: url,
22 | summary: my.extractSummary(url, data)
23 | })
24 | }
25 | my.getRawJSON(url, onSuccess, error);
26 | } |
_getDbpediaUrl27 | 28 |Convert a Wikipedia url convert to DBPedia url | my._getDbpediaUrl = function(url) {
29 | if (url.indexOf('wikipedia')) {
30 | var parts = url.split('/');
31 | var title = parts[parts.length-1];
32 | url = 'http://dbpedia.org/resource/' + title;
33 | }
34 | return url;
35 | }; |
getRawJSON36 | 37 |get raw RDF JSON for DBPedia resource from DBPedia SPARQL endpoint | my.getRawJSON = function(url, callback, error) {
38 | var sparqlQuery = 'DESCRIBE <{{url}}>'.replace('{{url}}', url);
39 | var jqxhr = $.ajax({
40 | url: my.endpoint,
41 | data: {
42 | query: sparqlQuery, |
format: 'application/x-json+ld' | format: 'application/rdf+json'
43 | },
44 | success: callback,
45 | error: error
46 | });
47 | }; |
Standard RDF namespace prefixes for use in lookupProperty function | my.PREFIX = {
48 | rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
49 | rdfs: "http://www.w3.org/2000/01/rdf-schema#",
50 | xsd: "http://www.w3.org/2001/XMLSchema#",
51 | owl: "http://www.w3.org/2002/07/owl#",
52 | dc: "http://purl.org/dc/terms/",
53 | foaf: "http://xmlns.com/foaf/0.1/",
54 | vcard: "http://www.w3.org/2006/vcard/ns#",
55 | dbp: "http://dbpedia.org/property/",
56 | dbo: "http://dbpedia.org/ontology/",
57 | geo: "http://www.geonames.org/ontology#",
58 | wgs: "http://www.w3.org/2003/01/geo/wgs84_pos#"
59 | }; |
lookupProperty60 | 61 |lookup a property value given a standard RDF/JSON property dictionary 62 | e.g. something like ... 63 | 64 | | my._lookupProperty = function(dict, property) { |
first expand namespace | for(key in WIKIPEDIA.PREFIX) {
73 | if (property.indexOf(key + ':') == 0) {
74 | property = WIKIPEDIA.PREFIX[key] + property.slice(key.length + 1);
75 | }
76 | }
77 | var values = dict[property];
78 | for (idx in values) { |
only take english values if lang is present | if (!values[idx]['lang'] || values[idx].lang == 'en') {
79 | return values[idx].value;
80 | }
81 | }
82 | }; |
Extract a standard set of attributes (e.g. title, description, dates etc 83 | etc) from rdfJson and the given subject uri (url) e.g. 84 | 85 | | my.extractSummary = function(subjectUri, rdfJson) {
87 | var properties = rdfJson[subjectUri];
88 | function lkup(attribs) {
89 | if (attribs instanceof Array) {
90 | var out = [];
91 | for (idx in attribs) {
92 | var _tmp = my._lookupProperty(properties, attribs[idx]);
93 | if (_tmp) {
94 | out.push(_tmp);
95 | }
96 | }
97 | return out;
98 | } else {
99 | return my._lookupProperty(properties, attribs);
100 | }
101 | }
102 |
103 | var summaryInfo = {
104 | title: lkup('rdfs:label'),
105 | description: lkup('dbo:abstract'),
106 | summary: lkup('rdfs:comment'),
107 | birthDate: lkup('dbp:birthDate'),
108 | deathDate: lkup('dbp:deathDate'), |
both dbp:date and dbo:date are usually present but dbp:date is 109 | frequently "bad" (e.g. just a single integer rather than a date) 110 | whereas ontology value is better | date: lkup('dbo:date'),
111 | place: lkup('dbp:place'),
112 | birthPlace: lkup('dpb:birthPlace'),
113 | deathPlace: lkup('dpb:deathPlace'),
114 | source: lkup('foaf:page'),
115 | images: lkup(['dbo:thumbnail', 'foaf:depiction', 'foaf:img']),
116 | location: {
117 | lat: lkup('wgs:lat'),
118 | lon: lkup('wgs:lon')
119 | }
120 | };
121 |
122 | summaryInfo.start = summaryInfo.birthDate || summaryInfo.date;
123 | summaryInfo.end = summaryInfo.deathDate;
124 | summaryInfo.location.title = summaryInfo.place || summaryInfo.birthPlace ||
125 | summaryInfo.deathPlace;
126 | summaryInfo.image = summaryInfo.images ? summaryInfo.images[0] : null;
127 |
128 | return summaryInfo;
129 | };
130 |
131 | return my;
132 | }();
133 |
134 | |
You can grab the (incredibly lightweight) wikipedia.js library from here. 68 |
Want to browse the annotated source?
69 |The library is the work of Open Knowledge Foundation Labs and Rufus Pollock in particular. It is, in essence, a small wrapper around the data and APIs of the DBPedia project and it is they who have done all the heavy lifting of extracting structured data from Wikipedia - huge credit and thanks to DBPedia folks!
70 |// first include the script
74 | <script src='http://okfnlabs.org/wikipediajs/wikipedia.js'></script>
75 | var info = WIKIPEDIA.getData('http://en.wikipedia.org/wiki/Invasion_of_Normandy');
76 | alert(info.summary.title);
77 | console.log(info);
78 | Or, check out the more detailed demo below ...
79 |This simple demo uses wikipedia.js library to load information on the given page and display it below. The JS source for the demo is here while the HTML source is just this page!
90 | 91 | 92 | 95 | 96 | 134 |