├── README.md ├── docs ├── docco.css └── wikipedia.html ├── index.html ├── js └── app.js └── wikipedia.js /README.md: -------------------------------------------------------------------------------- 1 | Extract information from Wikipedia via DBPedia using pure javascript. 2 | 3 | See the main site for more details! 4 | 5 | ## LICENSE 6 | 7 | Licensed under the [MIT License][1]. 8 | 9 | [1]: http://opensource.org/licenses/mit-license.php 10 | 11 | -------------------------------------------------------------------------------- /docs/docco.css: -------------------------------------------------------------------------------- 1 | /*--------------------- Layout and Typography ----------------------------*/ 2 | body { 3 | font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif; 4 | font-size: 15px; 5 | line-height: 22px; 6 | color: #252519; 7 | margin: 0; padding: 0; 8 | } 9 | a { 10 | color: #261a3b; 11 | } 12 | a:visited { 13 | color: #261a3b; 14 | } 15 | p { 16 | margin: 0 0 15px 0; 17 | } 18 | h1, h2, h3, h4, h5, h6 { 19 | margin: 0px 0 15px 0; 20 | } 21 | h1 { 22 | margin-top: 40px; 23 | } 24 | #container { 25 | position: relative; 26 | } 27 | #background { 28 | position: fixed; 29 | top: 0; left: 525px; right: 0; bottom: 0; 30 | background: #f5f5ff; 31 | border-left: 1px solid #e5e5ee; 32 | z-index: -1; 33 | } 34 | #jump_to, #jump_page { 35 | background: white; 36 | -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; 37 | -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; 38 | font: 10px Arial; 39 | text-transform: uppercase; 40 | cursor: pointer; 41 | text-align: right; 42 | } 43 | #jump_to, #jump_wrapper { 44 | position: fixed; 45 | right: 0; top: 0; 46 | padding: 5px 10px; 47 | } 48 | #jump_wrapper { 49 | padding: 0; 50 | display: none; 51 | } 52 | #jump_to:hover #jump_wrapper { 53 | display: block; 54 | } 55 | #jump_page { 56 | padding: 5px 0 3px; 57 | margin: 0 0 25px 25px; 58 | } 59 | #jump_page .source { 60 | display: block; 61 | padding: 5px 10px; 62 | text-decoration: none; 63 | border-top: 1px solid #eee; 64 | } 65 | #jump_page .source:hover { 66 | background: #f5f5ff; 67 | } 68 | #jump_page .source:first-child { 69 | } 70 | table td { 71 | border: 0; 72 | outline: 0; 73 | } 74 | td.docs, th.docs { 75 | max-width: 450px; 76 | min-width: 450px; 77 | min-height: 5px; 78 | padding: 10px 25px 1px 50px; 79 | overflow-x: hidden; 80 | vertical-align: top; 81 | text-align: left; 82 | } 83 | .docs pre { 84 | margin: 15px 0 15px; 85 | padding-left: 15px; 86 | } 87 | .docs p tt, .docs p code { 88 | background: #f8f8ff; 89 | border: 1px solid #dedede; 90 | font-size: 12px; 91 | padding: 0 0.2em; 92 | } 93 | .pilwrap { 94 | position: relative; 95 | } 96 | .pilcrow { 97 | font: 12px Arial; 98 | text-decoration: none; 99 | color: #454545; 100 | position: absolute; 101 | top: 3px; left: -20px; 102 | padding: 1px 2px; 103 | opacity: 0; 104 | -webkit-transition: opacity 0.2s linear; 105 | } 106 | td.docs:hover .pilcrow { 107 | opacity: 1; 108 | } 109 | td.code, th.code { 110 | padding: 14px 15px 16px 25px; 111 | width: 100%; 112 | vertical-align: top; 113 | background: #f5f5ff; 114 | border-left: 1px solid #e5e5ee; 115 | } 116 | pre, tt, code { 117 | font-size: 12px; line-height: 18px; 118 | font-family: Monaco, Consolas, "Lucida Console", monospace; 119 | margin: 0; padding: 0; 120 | } 121 | 122 | 123 | /*---------------------- Syntax Highlighting -----------------------------*/ 124 | td.linenos { background-color: #f0f0f0; padding-right: 10px; } 125 | span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } 126 | body .hll { background-color: #ffffcc } 127 | body .c { color: #408080; font-style: italic } /* Comment */ 128 | body .err { border: 1px solid #FF0000 } /* Error */ 129 | body .k { color: #954121 } /* Keyword */ 130 | body .o { color: #666666 } /* Operator */ 131 | body .cm { color: #408080; font-style: italic } /* Comment.Multiline */ 132 | body .cp { color: #BC7A00 } /* Comment.Preproc */ 133 | body .c1 { color: #408080; font-style: italic } /* Comment.Single */ 134 | body .cs { color: #408080; font-style: italic } /* Comment.Special */ 135 | body .gd { color: #A00000 } /* Generic.Deleted */ 136 | body .ge { font-style: italic } /* Generic.Emph */ 137 | body .gr { color: #FF0000 } /* Generic.Error */ 138 | body .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 139 | body .gi { color: #00A000 } /* Generic.Inserted */ 140 | body .go { color: #808080 } /* Generic.Output */ 141 | body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ 142 | body .gs { font-weight: bold } /* Generic.Strong */ 143 | body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 144 | body .gt { color: #0040D0 } /* Generic.Traceback */ 145 | body .kc { color: #954121 } /* Keyword.Constant */ 146 | body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */ 147 | body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */ 148 | body .kp { color: #954121 } /* Keyword.Pseudo */ 149 | body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */ 150 | body .kt { color: #B00040 } /* Keyword.Type */ 151 | body .m { color: #666666 } /* Literal.Number */ 152 | body .s { color: #219161 } /* Literal.String */ 153 | body .na { color: #7D9029 } /* Name.Attribute */ 154 | body .nb { color: #954121 } /* Name.Builtin */ 155 | body .nc { color: #0000FF; font-weight: bold } /* Name.Class */ 156 | body .no { color: #880000 } /* Name.Constant */ 157 | body .nd { color: #AA22FF } /* Name.Decorator */ 158 | body .ni { color: #999999; font-weight: bold } /* Name.Entity */ 159 | body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ 160 | body .nf { color: #0000FF } /* Name.Function */ 161 | body .nl { color: #A0A000 } /* Name.Label */ 162 | body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ 163 | body .nt { color: #954121; font-weight: bold } /* Name.Tag */ 164 | body .nv { color: #19469D } /* Name.Variable */ 165 | body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ 166 | body .w { color: #bbbbbb } /* Text.Whitespace */ 167 | body .mf { color: #666666 } /* Literal.Number.Float */ 168 | body .mh { color: #666666 } /* Literal.Number.Hex */ 169 | body .mi { color: #666666 } /* Literal.Number.Integer */ 170 | body .mo { color: #666666 } /* Literal.Number.Oct */ 171 | body .sb { color: #219161 } /* Literal.String.Backtick */ 172 | body .sc { color: #219161 } /* Literal.String.Char */ 173 | body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */ 174 | body .s2 { color: #219161 } /* Literal.String.Double */ 175 | body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ 176 | body .sh { color: #219161 } /* Literal.String.Heredoc */ 177 | body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ 178 | body .sx { color: #954121 } /* Literal.String.Other */ 179 | body .sr { color: #BB6688 } /* Literal.String.Regex */ 180 | body .s1 { color: #219161 } /* Literal.String.Single */ 181 | body .ss { color: #19469D } /* Literal.String.Symbol */ 182 | body .bp { color: #954121 } /* Name.Builtin.Pseudo */ 183 | body .vc { color: #19469D } /* Name.Variable.Class */ 184 | body .vg { color: #19469D } /* Name.Variable.Global */ 185 | body .vi { color: #19469D } /* Name.Variable.Instance */ 186 | body .il { color: #666666 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/wikipedia.html: -------------------------------------------------------------------------------- 1 | wikipedia.js

wikipedia.js

var WIKIPEDIA = function() {
  2 |   var my = {};

DBPedia SPARQL endpoint

  my.endpoint = 'http://dbpedia.org/sparql/';

getData

3 | 4 |

Return structured information (via callback) on the provided Wikipedia URL by querying 5 | the DBPedia SPARQL endpoint and then tidying the data up.

6 | 7 |

Data is return in the form of the following hash:

8 | 9 |

{ 10 | raw: the-raw-json-from-dbpedia, 11 | summary: a-cleaned-up-set-of-the-properties (see extractSummary), 12 | dbpediaUrl: dbpedia-resource-url e.g. http://dbpedia.org/resource/WorldWarII 13 | }

14 | 15 |

Function is asynchronous as we have to call out to DBPedia to get the 16 | info.

  my.getData = function(wikipediaUrl, callback, error) {
 17 |     var url = my._getDbpediaUrl(wikipediaUrl);
 18 |     function onSuccess(data) {
 19 |       callback({
 20 |         raw: data,
 21 |         dbpediaUrl: url,
 22 |         summary: my.extractSummary(url, data)
 23 |       })
 24 |     }
 25 |     my.getRawJSON(url, onSuccess, error);
 26 |   }

_getDbpediaUrl

27 | 28 |

Convert a Wikipedia url convert to DBPedia url

  my._getDbpediaUrl = function(url) {
 29 |     if (url.indexOf('wikipedia')) {
 30 |       var parts = url.split('/');
 31 |       var title = parts[parts.length-1];
 32 |       url = 'http://dbpedia.org/resource/' + title;
 33 |     }
 34 |     return url;
 35 |   };

getRawJSON

36 | 37 |

get raw RDF JSON for DBPedia resource from DBPedia SPARQL endpoint

  my.getRawJSON = function(url, callback, error) {
 38 |     var sparqlQuery = 'DESCRIBE <{{url}}>'.replace('{{url}}', url);
 39 |     var jqxhr = $.ajax({
 40 |       url: my.endpoint,
 41 |       data: {
 42 |         query: sparqlQuery,

format: 'application/x-json+ld'

        format: 'application/rdf+json'
 43 |       },
 44 |       success: callback,
 45 |       error: error
 46 |     });
 47 |   };

Standard RDF namespace prefixes for use in lookupProperty function

  my.PREFIX = {
 48 |     rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
 49 |     rdfs: "http://www.w3.org/2000/01/rdf-schema#",
 50 |     xsd: "http://www.w3.org/2001/XMLSchema#",
 51 |     owl: "http://www.w3.org/2002/07/owl#",
 52 |     dc: "http://purl.org/dc/terms/",
 53 |     foaf: "http://xmlns.com/foaf/0.1/",
 54 |     vcard: "http://www.w3.org/2006/vcard/ns#",
 55 |     dbp: "http://dbpedia.org/property/",
 56 |     dbo: "http://dbpedia.org/ontology/",
 57 |     geo: "http://www.geonames.org/ontology#",
 58 |     wgs: "http://www.w3.org/2003/01/geo/wgs84_pos#"
 59 |   };

lookupProperty

60 | 61 |

lookup a property value given a standard RDF/JSON property dictionary 62 | e.g. something like ...

63 | 64 |
  ...
 65 |   "http://dbpedia.org/property/regent": [
 66 |          {
 67 |              "type": "uri",
 68 |              "value": "http://dbpedia.org/resource/Richard_I_of_England"
 69 |          }
 70 |      ],
 71 |   ...
 72 | 
  my._lookupProperty = function(dict, property) {

first expand namespace

    for(key in WIKIPEDIA.PREFIX) {
 73 |       if (property.indexOf(key + ':') == 0) {
 74 |         property = WIKIPEDIA.PREFIX[key] + property.slice(key.length + 1);
 75 |       }
 76 |     }
 77 |     var values = dict[property];
 78 |     for (idx in values) {

only take english values if lang is present

      if (!values[idx]['lang'] || values[idx].lang == 'en') {
 79 |         return values[idx].value;
 80 |       }
 81 |     }
 82 |   };

Extract a standard set of attributes (e.g. title, description, dates etc 83 | etc) from rdfJson and the given subject uri (url) e.g.

84 | 85 |
 extractSummary('http://dbpedia.org/resource/Rufus_Pollock', rdfJson object from dbpedia)
 86 | 
  my.extractSummary = function(subjectUri, rdfJson) {
 87 |     var properties = rdfJson[subjectUri];
 88 |     function lkup(attribs) {
 89 |       if (attribs instanceof Array) {
 90 |         var out = [];
 91 |         for (idx in attribs) {
 92 |           var _tmp = my._lookupProperty(properties, attribs[idx]);
 93 |           if (_tmp) {
 94 |             out.push(_tmp);
 95 |           }
 96 |         }
 97 |         return out;
 98 |       } else {
 99 |         return my._lookupProperty(properties, attribs);
100 |       }
101 |     }
102 | 
103 |     var summaryInfo = {
104 |       title: lkup('rdfs:label'),
105 |       description: lkup('dbo:abstract'),
106 |       summary: lkup('rdfs:comment'),
107 |       birthDate: lkup('dbp:birthDate'),
108 |       deathDate: lkup('dbp:deathDate'),

both dbp:date and dbo:date are usually present but dbp:date is 109 | frequently "bad" (e.g. just a single integer rather than a date) 110 | whereas ontology value is better

      date: lkup('dbo:date'),
111 |       place: lkup('dbp:place'),
112 |       birthPlace: lkup('dpb:birthPlace'),
113 |       deathPlace: lkup('dpb:deathPlace'),
114 |       source: lkup('foaf:page'),
115 |       images: lkup(['dbo:thumbnail', 'foaf:depiction', 'foaf:img']),
116 |       location: {
117 |         lat: lkup('wgs:lat'),
118 |         lon: lkup('wgs:lon')
119 |       }
120 |     };
121 | 
122 |     summaryInfo.start = summaryInfo.birthDate || summaryInfo.date;
123 |     summaryInfo.end = summaryInfo.deathDate;
124 |     summaryInfo.location.title = summaryInfo.place || summaryInfo.birthPlace ||
125 |       summaryInfo.deathPlace;
126 |     summaryInfo.image = summaryInfo.images ? summaryInfo.images[0] : null;
127 | 
128 |     return summaryInfo;
129 |   };
130 | 
131 |   return my;
132 | }();
133 | 
134 | 
-------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Wikipedia JS 6 | 7 | 8 | 9 | 27 | 28 | 29 | 55 | 56 |
57 | 63 |

The Library

64 |
65 |
66 |

Get the code

67 |

You can grab the (incredibly lightweight) wikipedia.js library from here. 68 |

Want to browse the annotated source?

69 |

The library is the work of Open Knowledge Foundation Labs and Rufus Pollock in particular. It is, in essence, a small wrapper around the data and APIs of the DBPedia project and it is they who have done all the heavy lifting of extracting structured data from Wikipedia - huge credit and thanks to DBPedia folks!

70 |
71 |
72 |

Using it

73 |
// first include the script
 74 | <script src='http://okfnlabs.org/wikipediajs/wikipedia.js'></script>
 75 | var info = WIKIPEDIA.getData('http://en.wikipedia.org/wiki/Invasion_of_Normandy');
 76 | alert(info.summary.title);
 77 | console.log(info);
78 |

Or, check out the more detailed demo below ...

79 |
80 |
81 | 82 |

A Demo

83 |
84 | 85 | 86 | 87 |
88 | 89 |

This simple demo uses wikipedia.js library to load information on the given page and display it below. The JS source for the demo is here while the HTML source is just this page!

90 | 91 | 92 | 95 | 96 | 134 |
135 | 136 | 137 | 138 | 139 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /js/app.js: -------------------------------------------------------------------------------- 1 | jQuery(function() { 2 | var q = parseQueryString(window.location.search); 3 | if (q.url) { 4 | $('input[name="url"]').val(q.url); 5 | 6 | $('.loading').show(); 7 | 8 | var display = function(info) { 9 | $('.loading').hide(); 10 | $('.results').show(); 11 | 12 | rawData = info.raw; 13 | var summaryInfo = info.summary; 14 | var properties = rawData[info.dbpediaUrl]; 15 | 16 | for (key in summaryInfo) { 17 | $('.summary .' + key).text(summaryInfo[key]); 18 | } 19 | $('.summary .thumbnail').attr('src', summaryInfo.image); 20 | var dataAsJson = JSON.stringify(summaryInfo, null, ' ') 21 | $('.summary .raw').val(dataAsJson); 22 | 23 | // Raw Data Summary 24 | var count = 0; 25 | for (key in properties) { 26 | count += 1; 27 | $('.data-summary .properties').append(key + '\n'); 28 | } 29 | $('.data-summary .count').text(count); 30 | 31 | // raw JSON 32 | var dataAsJson = JSON.stringify(rawData, null, ' ') 33 | $('.results-json').val(dataAsJson); 34 | 35 | $('html,body').animate({ 36 | scrollTop: $('#demo').offset().top 37 | }, 38 | 'slow' 39 | ); 40 | }; 41 | 42 | WIKIPEDIA.getData(q.url, display, function(error) { 43 | alert(error); 44 | } 45 | ); 46 | } 47 | 48 | $('.js-data-summary').click(function(e) { 49 | $('.data-summary').show(); 50 | }); 51 | }); 52 | 53 | // TODO: search of wikipedia 54 | // http://en.wikipedia.org/w/api.php?action=query&format=json&callback=test&list=search&srsearch=%richard% 55 | 56 | // Parse a URL query string (?xyz=abc...) into a dictionary. 57 | parseQueryString = function(q) { 58 | if (!q) { 59 | return {}; 60 | } 61 | var urlParams = {}, 62 | e, d = function (s) { 63 | return unescape(s.replace(/\+/g, " ")); 64 | }, 65 | r = /([^&=]+)=?([^&]*)/g; 66 | 67 | if (q && q.length && q[0] === '?') { 68 | q = q.slice(1); 69 | } 70 | while (e = r.exec(q)) { 71 | // TODO: have values be array as query string allow repetition of keys 72 | urlParams[d(e[1])] = d(e[2]); 73 | } 74 | return urlParams; 75 | }; 76 | -------------------------------------------------------------------------------- /wikipedia.js: -------------------------------------------------------------------------------- 1 | var WIKIPEDIA = function() { 2 | var my = {}; 3 | 4 | // DBPedia SPARQL endpoint 5 | my.endpoint = 'http://dbpedia.org/sparql/'; 6 | 7 | // ### getData 8 | // 9 | // Return structured information (via callback) on the provided Wikipedia URL by querying 10 | // the DBPedia SPARQL endpoint and then tidying the data up. 11 | // 12 | // @param: wikipediaUrlOrPageName. A wikipedia URL or pageName or an object 13 | // with following struture: 14 | // 15 | // { 16 | // url: wikipediaURLOrPageName, 17 | // raw: false // do not include the raw data in what is returned 18 | // } 19 | // 20 | // @return: Data is return in the form of the following hash: 21 | // 22 | // { 23 | // raw: the-raw-json-from-dbpedia, 24 | // summary: a-cleaned-up-set-of-the-properties (see extractSummary), 25 | // dbpediaUrl: dbpedia-resource-url e.g. http://dbpedia.org/resource/World_War_II 26 | // } 27 | // 28 | // Function is asynchronous as we have to call out to DBPedia to get the 29 | // info. 30 | my.getData = function(wikipediaUrlOrPageName, callback, error) { 31 | var url = my._getDbpediaUrl(wikipediaUrlOrPageName); 32 | function onSuccess(data) { 33 | var out = { 34 | raw: data, 35 | dbpediaUrl: url, 36 | summary: null 37 | }; 38 | if (data) { 39 | out.summary = my.extractSummary(url, data); 40 | } else { 41 | out.error = 'Failed to retrieve data. Is the URL or page name correct?'; 42 | } 43 | callback(out); 44 | } 45 | my.getRawJson(url, onSuccess, error); 46 | }; 47 | 48 | // ### _getDbpediaUrl 49 | // 50 | // Convert the incoming URL or page name to a DBPedia url 51 | my._getDbpediaUrl = function(url) { 52 | if (url.indexOf('wikipedia')!=-1) { 53 | var parts = url.split('/'); 54 | var title = parts[parts.length-1]; 55 | url = 'http://dbpedia.org/resource/' + title; 56 | return url; 57 | } else if (url.indexOf('dbpedia.org')!=-1) { 58 | return url; 59 | } else { 60 | url = 'http://dbpedia.org/resource/' + url.replace(/ /g, '_'); 61 | return url; 62 | } 63 | }; 64 | 65 | // ### getRawJson 66 | // 67 | // get raw RDF JSON for DBPedia resource from DBPedia SPARQL endpoint 68 | my.getRawJson = function(url, callback, error) { 69 | var sparqlQuery = 'DESCRIBE <{{url}}>'.replace('{{url}}', url); 70 | var jqxhr = $.ajax({ 71 | url: my.endpoint, 72 | data: { 73 | query: sparqlQuery, 74 | // format: 'application/x-json+ld' 75 | format: 'application/rdf+json' 76 | }, 77 | dataType: 'json', 78 | success: callback, 79 | error: error 80 | }); 81 | }; 82 | 83 | // Standard RDF namespace prefixes for use in lookupProperty function 84 | my.PREFIX = { 85 | rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 86 | rdfs: "http://www.w3.org/2000/01/rdf-schema#", 87 | xsd: "http://www.w3.org/2001/XMLSchema#", 88 | owl: "http://www.w3.org/2002/07/owl#", 89 | dc: "http://purl.org/dc/terms/", 90 | foaf: "http://xmlns.com/foaf/0.1/", 91 | vcard: "http://www.w3.org/2006/vcard/ns#", 92 | dbp: "http://dbpedia.org/property/", 93 | dbo: "http://dbpedia.org/ontology/", 94 | geo: "http://www.geonames.org/ontology#", 95 | wgs: "http://www.w3.org/2003/01/geo/wgs84_pos#" 96 | }; 97 | 98 | my._expandNamespacePrefix = function(uriWithPrefix) { 99 | for(var key in WIKIPEDIA.PREFIX) { 100 | if (uriWithPrefix.indexOf(key + ':') === 0) { 101 | uriWithPrefix = WIKIPEDIA.PREFIX[key] + uriWithPrefix.slice(key.length + 1); 102 | } 103 | } 104 | return uriWithPrefix; 105 | }; 106 | 107 | // ### lookupProperty 108 | // 109 | // lookup a property value given a standard RDF/JSON property dictionary 110 | // e.g. something like ... 111 | // 112 | // ... 113 | // "http://dbpedia.org/property/regent": [ 114 | // { 115 | // "type": "uri", 116 | // "value": "http://dbpedia.org/resource/Richard_I_of_England" 117 | // } 118 | // ], 119 | // ... 120 | my._lookupProperty = function(dict, property) { 121 | property = my._expandNamespacePrefix(property); 122 | var values = dict[property]; 123 | for (var idx in values) { 124 | // only take english values if lang is present 125 | if (!values[idx]['lang'] || values[idx].lang == 'en') { 126 | return values[idx].value; 127 | } 128 | } 129 | }; 130 | 131 | // Extract a standard set of attributes (e.g. title, description, dates etc 132 | // etc) from rdfJson and the given subject uri (url) e.g. 133 | // 134 | // extractSummary('http://dbpedia.org/resource/Rufus_Pollock', rdfJson object from dbpedia) 135 | my.extractSummary = function(subjectUri, rdfJson) { 136 | var properties = rdfJson[subjectUri]; 137 | function lkup(attribs) { 138 | if (attribs instanceof Array) { 139 | var out = []; 140 | for (var idx in attribs) { 141 | var _tmp = my._lookupProperty(properties, attribs[idx]); 142 | if (_tmp) { 143 | out.push(_tmp); 144 | } 145 | } 146 | return out; 147 | } else { 148 | return my._lookupProperty(properties, attribs); 149 | } 150 | } 151 | 152 | var summaryInfo = { 153 | title: lkup('rdfs:label'), 154 | description: lkup('dbo:abstract'), 155 | summary: lkup('rdfs:comment'), 156 | startDates: lkup(['dbo:birthDate', 'dbo:formationDate', 'dbo:foundingYear']), 157 | endDates: lkup('dbo:deathDate'), 158 | // both dbp:date and dbo:date are usually present but dbp:date is 159 | // frequently "bad" (e.g. just a single integer rather than a date) 160 | // whereas ontology value is better 161 | date: lkup('dbo:date'), 162 | place: lkup('dbp:place'), 163 | birthPlace: lkup('dbo:birthPlace'), 164 | deathPlace: lkup('dbo:deathPlace'), 165 | source: lkup('foaf:page'), 166 | images: lkup(['dbo:thumbnail', 'foaf:depiction', 'foaf:img']), 167 | location: { 168 | lat: lkup('wgs:lat'), 169 | lon: lkup('wgs:long') 170 | }, 171 | types: [], 172 | type: null 173 | }; 174 | 175 | // getLastPartOfUrl 176 | function gl(url) { 177 | var parts = url.split('/'); 178 | return parts[parts.length-1]; 179 | } 180 | 181 | var typeUri = my._expandNamespacePrefix('rdf:type'); 182 | var types = []; 183 | var typeObjs = properties[typeUri]; 184 | for(var idx in typeObjs) { 185 | var value = typeObjs[idx].value; 186 | // let's be selective 187 | // ignore yago and owl stuff 188 | if (value.indexOf('dbpedia.org/ontology') != -1 || value.indexOf('schema.org') != -1 || value.indexOf('foaf/0.1') != -1) { 189 | // TODO: ensure uniqueness (do not push same thing ...) 190 | summaryInfo.types.push(gl(value)); 191 | // use schema.org value as the default 192 | if (value.indexOf('schema.org') != -1) { 193 | summaryInfo.type = gl(value); 194 | } 195 | } 196 | } 197 | if (!summaryInfo.type && summaryInfo.types.length > 0) { 198 | summaryInfo.type = summaryInfo.types[0]; 199 | } 200 | 201 | summaryInfo.start = summaryInfo.startDates.length > 0 ? summaryInfo.startDates[0] : summaryInfo.date; 202 | summaryInfo.end = summaryInfo.endDates; 203 | if (!summaryInfo.place) { 204 | // death place is more likely more significant than death place 205 | summaryInfo.place = summaryInfo.deathPlace || summaryInfo.birthPlace; 206 | } 207 | // if place a uri clean it up ... 208 | if (summaryInfo.place) { 209 | summaryInfo.place = gl(summaryInfo.place); 210 | } 211 | summaryInfo.location.title = summaryInfo.place; 212 | summaryInfo.image = summaryInfo.images ? summaryInfo.images[0] : null; 213 | 214 | return summaryInfo; 215 | }; 216 | 217 | return my; 218 | }(); 219 | --------------------------------------------------------------------------------