├── faq.css ├── .gitignore ├── h1-p ├── gene.dev.head ├── gene.key.head ├── gene.train.head ├── gene.train.head2 ├── hash.js ├── tag.html ├── HashSpec.js ├── emission_count.html ├── index.html ├── viterbi.html ├── count.html ├── submit.py ├── count_freqs.py ├── viterbi.js ├── ViterbiSpec.js └── eval_gene_tagger.py ├── mergeupstream.sh ├── thirdparty ├── jasmine-standalone-1.3.1 │ ├── .DS_Store │ ├── src │ │ ├── Song.js │ │ └── Player.js │ ├── spec │ │ ├── SpecHelper.js │ │ └── PlayerSpec.js │ └── lib │ │ └── jasmine-1.3.1 │ │ ├── MIT.LICENSE │ │ └── jasmine.css ├── jquery.rules.rdfs.js ├── jquery.rdf.json.js ├── qunit-1.10.0.css ├── jquery.icndb.js ├── removeStopWords.js ├── jquery.uri.js ├── jquery.xmlns.js ├── jquery.curie.js ├── jquery.datatype.js ├── jquery.rules.js └── jquery.rdf.xml.js ├── initial_kb.txt ├── faq.js ├── index.html ├── tests.js ├── qunit.html ├── faq.html ├── jasmine.html ├── storageSpec.js ├── README.md ├── querySpec.js ├── storage.js └── query.js /faq.css: -------------------------------------------------------------------------------- 1 | #history{ 2 | width:100%; 3 | height:100%; 4 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | h1-p/gene_test.p1.out 3 | h1-p/*.pdf 4 | joke.json 5 | */.DS_Store 6 | *~ 7 | -------------------------------------------------------------------------------- /h1-p/gene.dev.head: -------------------------------------------------------------------------------- 1 | BACKGROUND 2 | : 3 | Ischemic 4 | heart 5 | disease 6 | is 7 | the 8 | primary 9 | cause 10 | of 11 | -------------------------------------------------------------------------------- /mergeupstream.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | git fetch upstream 3 | git checkout master 4 | git merge upstream/master 5 | git push origin master -------------------------------------------------------------------------------- /thirdparty/jasmine-standalone-1.3.1/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tansaku/faqbot/HEAD/thirdparty/jasmine-standalone-1.3.1/.DS_Store -------------------------------------------------------------------------------- /h1-p/gene.key.head: -------------------------------------------------------------------------------- 1 | BACKGROUND O 2 | : O 3 | Ischemic O 4 | heart O 5 | disease O 6 | is O 7 | the O 8 | primary O 9 | cause O 10 | of O 11 | -------------------------------------------------------------------------------- /h1-p/gene.train.head: -------------------------------------------------------------------------------- 1 | Comparison O 2 | Comparison O 3 | with O 4 | alkaline I-GENE 5 | phosphatases I-GENE 6 | and O 7 | 5 I-GENE 8 | - I-GENE 9 | nucleotidase I-GENE 10 | 11 | Pharmacologic O 12 | -------------------------------------------------------------------------------- /thirdparty/jasmine-standalone-1.3.1/src/Song.js: -------------------------------------------------------------------------------- 1 | function Song() { 2 | } 3 | 4 | Song.prototype.persistFavoriteStatus = function(value) { 5 | // something complicated 6 | throw new Error("not yet implemented"); 7 | }; -------------------------------------------------------------------------------- /thirdparty/jasmine-standalone-1.3.1/spec/SpecHelper.js: -------------------------------------------------------------------------------- 1 | beforeEach(function() { 2 | this.addMatchers({ 3 | toBePlaying: function(expectedSong) { 4 | var player = this.actual; 5 | return player.currentlyPlayingSong === expectedSong && 6 | player.isPlaying; 7 | } 8 | }); 9 | }); 10 | -------------------------------------------------------------------------------- /initial_kb.txt: -------------------------------------------------------------------------------- 1 | # We'll use the FOAF vocabularly to represent people 2 | # http://en.wikipedia.org/wiki/FOAF_%28software%29 3 | @prefix foaf: . 4 | @prefix dc: . 5 | @prefix dct: . 6 | _:sam a foaf:Person ; foaf:name "Sam Joseph" . 7 | _:dave a foaf:Person ; foaf:name "Dave Snowdon" . 8 | -------------------------------------------------------------------------------- /faq.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function () { 2 | $("input#sentence").keypress(function(event) { 3 | if (event.which == 13) { 4 | event.preventDefault(); 5 | $("form#chat").submit(); 6 | } 7 | }); 8 | 9 | $("form#chat").submit(function () { 10 | return handleChat($("input#sentence").val()); 11 | }); 12 | showTranscript(storage); 13 | }); 14 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | Page Redirection 10 | 11 | 12 | 13 | If you are not redirected automatically, follow the link to the faqbot 14 | 15 | -------------------------------------------------------------------------------- /thirdparty/jasmine-standalone-1.3.1/src/Player.js: -------------------------------------------------------------------------------- 1 | function Player() { 2 | } 3 | Player.prototype.play = function(song) { 4 | this.currentlyPlayingSong = song; 5 | this.isPlaying = true; 6 | }; 7 | 8 | Player.prototype.pause = function() { 9 | this.isPlaying = false; 10 | }; 11 | 12 | Player.prototype.resume = function() { 13 | if (this.isPlaying) { 14 | throw new Error("song is already playing"); 15 | } 16 | 17 | this.isPlaying = true; 18 | }; 19 | 20 | Player.prototype.makeFavorite = function() { 21 | this.currentlyPlayingSong.persistFavoriteStatus(true); 22 | }; -------------------------------------------------------------------------------- /h1-p/gene.train.head2: -------------------------------------------------------------------------------- 1 | Comparison O 2 | Comparison O 3 | with O 4 | alkaline I-GENE 5 | phosphatases I-GENE 6 | and O 7 | 5 I-GENE 8 | - I-GENE 9 | nucleotidase I-GENE 10 | 11 | Pharmacologic O 12 | aspects O 13 | of O 14 | neonatal O 15 | hyperbilirubinemia O 16 | . O 17 | 18 | When O 19 | CSF O 20 | [ O 21 | HCO3 O 22 | -] O 23 | is O 24 | shown O 25 | as O 26 | a O 27 | function O 28 | of O 29 | CSF O 30 | PCO2 O 31 | the O 32 | data O 33 | of O 34 | K O 35 | - O 36 | depleted O 37 | rats O 38 | are O 39 | no O 40 | longer O 41 | displaced O 42 | when O 43 | compared O 44 | to O 45 | controls O 46 | but O 47 | still O 48 | have O 49 | a O 50 | significantly O 51 | -------------------------------------------------------------------------------- /h1-p/hash.js: -------------------------------------------------------------------------------- 1 | function Hash(starting_state, default_return){ 2 | this.default = 0; 3 | if(default_return !== undefined){ 4 | this.default = default_return; 5 | } 6 | this.hash = {}; 7 | if(starting_state !== undefined){ 8 | this.hash = starting_state; 9 | } 10 | 11 | this.get = function(array){ 12 | var temp = this.hash; 13 | for(var i in array){ 14 | if(temp[array[i]] === undefined){ 15 | return this.default; 16 | } 17 | temp = temp[array[i]]; 18 | } 19 | return temp; 20 | } 21 | this.delete = function(array){ 22 | var temp = this.hash; 23 | for(var i in array){ 24 | if(temp[array[i]] === undefined){ 25 | return this.default; 26 | } 27 | if(i == array.length-1){ 28 | temp[array[i]] = this.default; 29 | } 30 | temp = temp[array[i]]; 31 | } 32 | return temp; 33 | } 34 | this.set = function(array, value){ 35 | var temp = this.hash; 36 | for(var i in array){ 37 | if(temp[array[i]] === undefined){ 38 | temp[array[i]] = {}; 39 | } 40 | if(i == array.length-1){ 41 | temp[array[i]] = value; 42 | } 43 | temp = temp[array[i]]; 44 | } 45 | return temp; 46 | } 47 | } -------------------------------------------------------------------------------- /thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/MIT.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2011 Pivotal Labs 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /tests.js: -------------------------------------------------------------------------------- 1 | // would like to know about grouping tests within QUnit ... 2 | test( "a basic test example", function() { 3 | var value = "hello"; 4 | equal( value, "hello", "We expect value to be hello" ); 5 | }); 6 | test( "testing NLP", function() { 7 | var sentence = "There is a game engine Unreal Engine"; 8 | var result = query(sentence); 9 | equal( result, "what was that?"); 10 | // "game_engines", "Unreal Engine", {"name":"Unreal Engine","ident":"Unreal Engine"}) 11 | }); 12 | var sentences = []; 13 | var answers = []; 14 | sentences[0] = "There is a game engine called Unreal Engine"; 15 | answers[0] = "Unreal Engine is a game engine"; 16 | sentences[1] = "There is a horse called Matilda"; 17 | answers[1] = "Matilda is a horse"; 18 | sentences[2] = "There is a course called ML"; 19 | answers[2] = "ML is a course"; 20 | for (var i in sentences){ 21 | test( "\""+sentences[i] + "\" --> \"" + answers[i]+ "\"", function() { 22 | var result = query(sentences[i]); 23 | equal( result, answers[i]); 24 | }); 25 | } 26 | test( "testing Natural", function() { 27 | var result = natural.SoundEx.compare('phone', 'pone'); 28 | equal( result, true, "We expect value to be true" ); 29 | }); -------------------------------------------------------------------------------- /h1-p/tag.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Count Stuff 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |

20 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /qunit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | QUnit Tests for FaqBot 6 | 7 | 8 | 9 |
10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /h1-p/HashSpec.js: -------------------------------------------------------------------------------- 1 | describe("Hash", function() { 2 | var DEFAULT = 0; 3 | 4 | var hash; 5 | 6 | beforeEach(function() { 7 | hash = new Hash(); 8 | }); 9 | 10 | describe("starting state", function () { 11 | it("should have starting state key/value pairs we pass in", function () { 12 | hash = new Hash({'1':'a','2':'b','3':'c'}, DEFAULT); 13 | expect(hash.get(['1'])).toEqual('a'); 14 | expect(hash.get(['2'])).toEqual('b'); 15 | expect(hash.get(['3'])).toEqual('c'); 16 | expect(hash.get(['4'])).toEqual(DEFAULT); 17 | }); 18 | }); 19 | describe("get", function () { 20 | it("should get the default value", function () { 21 | expect(hash.get(['a'])).toEqual(DEFAULT); 22 | }); 23 | it("should get the default value even when requesting nested hashes", function () { 24 | expect(hash.get(['a','b'])).toEqual(DEFAULT); 25 | }); 26 | }); 27 | // TODO should check we throw an exception if get, delete or set is queried with something other than an array 28 | describe("delete", function () { 29 | it("should get the delete the appropriate value", function () { 30 | var value = 12; 31 | hash.set(['a'],value); 32 | expect(hash.get(['a'])).toEqual(value); 33 | hash.delete(['a']); 34 | expect(hash.get(['a'])).toEqual(DEFAULT); 35 | }); 36 | 37 | }); 38 | describe("set", function () { 39 | it("should get the set value", function () { 40 | var value = 12; 41 | hash.set(['a'],value); 42 | expect(hash.get(['a'])).toEqual(value); 43 | }); 44 | it("should get the default value even when requesting nested hashes", function () { 45 | var value = 12; 46 | hash.set(['a','b'],value); 47 | expect(hash.get(['a','b'])).toEqual(value); 48 | }); 49 | }); 50 | }); 51 | -------------------------------------------------------------------------------- /h1-p/emission_count.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Count Stuff 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |

22 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /h1-p/index.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Jasmine Spec Runner 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /h1-p/viterbi.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Viterbi Algorithm 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |

20 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /thirdparty/jquery.rules.rdfs.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery RDF Ontology @VERSION 3 | * 4 | * Copyright (c) 2009 Jeni Tennison 5 | * Licensed under the MIT (MIT-LICENSE.txt) 6 | * 7 | * Depends: 8 | * jquery.uri.js 9 | * jquery.xmlns.js 10 | * jquery.datatype.js 11 | * jquery.curie.js 12 | * jquery.rdf.js 13 | * jquery.rules.js 14 | */ 15 | /*global jQuery */ 16 | (function ($) { 17 | 18 | var 19 | nsRdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 20 | nsRdfs = "http://www.w3.org/2000/01/rdf-schema#"; 21 | 22 | $.rdf.ruleset.rdfs = $.rdf.ruleset() 23 | .prefix('rdf', nsRdf) 24 | .prefix('rdfs', nsRdfs) 25 | .add('?subject ?property ?object', '?property a rdf:Property') 26 | .add('?property rdfs:range ?class', 27 | ['?property a rdf:Property', '?class a rdfs:Class']) 28 | .add(['?property rdfs:range ?class', '?subject ?property ?object'], 29 | '?object a ?class') 30 | .add('?property rdfs:domain ?class', 31 | ['?property a rdf:Property', '?class a rdfs:Class']) 32 | .add(['?property rdfs:domain ?class', '?subject ?property ?object'], 33 | '?subject a ?class') 34 | .add('?instance a ?class', '?class a rdfs:Class') 35 | .add('?subclass rdfs:subClassOf ?class', 36 | ['?subclass a rdfs:Class', '?class a rdfs:Class']) 37 | .add(['?subclass rdfs:subClassOf ?class', '?instance a ?subclass'], 38 | '?instance a ?class') 39 | .add('?subproperty rdfs:subPropertyOf ?property', 40 | ['?subproperty a rdf:Property', '?property a rdf:Property']) 41 | .add(['?subproperty rdfs:subPropertyOf ?property', '?subject ?subproperty ?object'], 42 | '?subject ?property ?object') 43 | .add('?statement rdf:subject ?resource', '?statement a rdf:Statement') 44 | .add('?statement rdf:predicate ?property', 45 | ['?statement a rdf:Statement', '?property a rdf:Property']) 46 | .add('?statement rdf:object ?resource', '?statement a rdf:Statement') 47 | .add(['?statement rdf:subject ?subject', '?statement rdf:predicate ?property', '?statement rdf:object ?object'], 48 | '?subject ?property ?object') 49 | .add('?subject rdfs:isDefinedBy ?object', '?subject rdfs:seeAlso ?object') 50 | 51 | })(jQuery); 52 | -------------------------------------------------------------------------------- /faq.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | FaqBot 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 |
[Note, to get the bot to store knowledge say things like "There is a person called John", then you can ask questions about 'John']
29 |
[This project is in early alpha. Check the Documentation ]
30 |
31 |
Bot: hello
32 |
33 | 34 |
35 |
36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /thirdparty/jquery.rdf.json.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery RDF @VERSION 3 | * 4 | * Copyright (c) 2008,2009 Jeni Tennison 5 | * Licensed under the MIT (MIT-LICENSE.txt) 6 | * 7 | * Depends: 8 | * jquery.uri.js 9 | * jquery.xmlns.js 10 | * jquery.datatype.js 11 | * jquery.curie.js 12 | * jquery.rdf.js 13 | * jquery.json.js 14 | */ 15 | /** 16 | * @fileOverview jQuery RDF/JSON parser 17 | * @author Jeni Tennison 18 | * @copyright (c) 2008,2009 Jeni Tennison 19 | * @license MIT license (MIT-LICENSE.txt) 20 | * @version 1.0 21 | */ 22 | /** 23 | * @exports $ as jQuery 24 | */ 25 | /** 26 | * @ignore 27 | */ 28 | (function ($) { 29 | 30 | $.rdf.parsers['application/json'] = { 31 | parse: $.secureEvalJSON, 32 | serialize: $.toJSON, 33 | triples: function (data) { 34 | var s, subject, p, property, o, object, i, opts, triples = []; 35 | for (s in data) { 36 | subject = (s.substring(0, 2) === '_:') ? $.rdf.blank(s) : $.rdf.resource('<' + s + '>'); 37 | for (p in data[s]) { 38 | property = $.rdf.resource('<' + p + '>'); 39 | for (i = 0; i < data[s][p].length; i += 1) { 40 | o = data[s][p][i]; 41 | if (o.type === 'uri') { 42 | object = $.rdf.resource('<' + o.value + '>'); 43 | } else if (o.type === 'bnode') { 44 | object = $.rdf.blank(o.value); 45 | } else { 46 | // o.type === 'literal' 47 | if (o.datatype !== undefined) { 48 | object = $.rdf.literal(o.value, { datatype: o.datatype }); 49 | } else { 50 | opts = {}; 51 | if (o.lang !== undefined) { 52 | opts.lang = o.lang; 53 | } 54 | object = $.rdf.literal('"' + o.value + '"', opts); 55 | } 56 | } 57 | triples.push($.rdf.triple(subject, property, object)); 58 | } 59 | } 60 | } 61 | return triples; 62 | }, 63 | dump: function (triples) { 64 | var e = {}, 65 | i, t, s, p; 66 | for (i = 0; i < triples.length; i += 1) { 67 | t = triples[i]; 68 | s = t.subject.value.toString(); 69 | p = t.property.value.toString(); 70 | if (e[s] === undefined) { 71 | e[s] = {}; 72 | } 73 | if (e[s][p] === undefined) { 74 | e[s][p] = []; 75 | } 76 | e[s][p].push(t.object.dump()); 77 | } 78 | return e; 79 | } 80 | }; 81 | 82 | })(jQuery); 83 | -------------------------------------------------------------------------------- /thirdparty/jasmine-standalone-1.3.1/spec/PlayerSpec.js: -------------------------------------------------------------------------------- 1 | describe("FaqBot", function() { 2 | var sentences = []; 3 | var answers = []; 4 | 5 | it("should respond as expected ", function() { 6 | expect(query("There is a game engine Unreal Engine")).toEqual("what was that?"); 7 | }); 8 | 9 | 10 | sentences[0] = "There is a game engine called Unreal Engine"; 11 | answers[0] = "Unreal Engine is a game engine"; 12 | sentences[1] = "There is a horse called Matilda"; 13 | answers[1] = "Matilda is a horse"; 14 | sentences[2] = "There is a course called ML"; 15 | answers[2] = "ML is a course"; 16 | 17 | for (var i in sentences){ 18 | it( "should respond to \""+sentences[i] + "\" with --> \"" + answers[i]+ "\"", function() { 19 | expect(query(sentences[i])).toEqual(answers[i]); 20 | }); 21 | } 22 | 23 | }); 24 | 25 | describe("Player", function() { 26 | var player; 27 | var song; 28 | 29 | beforeEach(function() { 30 | player = new Player(); 31 | song = new Song(); 32 | }); 33 | 34 | it("should be able to play a Song", function() { 35 | player.play(song); 36 | expect(player.currentlyPlayingSong).toEqual(song); 37 | 38 | //demonstrates use of custom matcher 39 | expect(player).toBePlaying(song); 40 | }); 41 | 42 | describe("when song has been paused", function() { 43 | beforeEach(function() { 44 | player.play(song); 45 | player.pause(); 46 | }); 47 | 48 | it("should indicate that the song is currently paused", function() { 49 | expect(player.isPlaying).toBeFalsy(); 50 | 51 | // demonstrates use of 'not' with a custom matcher 52 | expect(player).not.toBePlaying(song); 53 | }); 54 | 55 | it("should be possible to resume", function() { 56 | player.resume(); 57 | expect(player.isPlaying).toBeTruthy(); 58 | expect(player.currentlyPlayingSong).toEqual(song); 59 | }); 60 | }); 61 | 62 | // demonstrates use of spies to intercept and test method calls 63 | it("tells the current song if the user has made it a favorite", function() { 64 | spyOn(song, 'persistFavoriteStatus'); 65 | 66 | player.play(song); 67 | player.makeFavorite(); 68 | 69 | expect(song.persistFavoriteStatus).toHaveBeenCalledWith(true); 70 | }); 71 | 72 | //demonstrates use of expected exceptions 73 | describe("#resume", function() { 74 | it("should throw an exception if song is already playing", function() { 75 | player.play(song); 76 | 77 | expect(function() { 78 | player.resume(); 79 | }).toThrow("song is already playing"); 80 | }); 81 | }); 82 | }); -------------------------------------------------------------------------------- /h1-p/count.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Count Stuff 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |

21 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /jasmine.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | Jasmine Spec Runner 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /storageSpec.js: -------------------------------------------------------------------------------- 1 | describe("Storage", function() { 2 | var storage; 3 | 4 | beforeEach(function() { 5 | storage = getStorage(new TransientStorage()); 6 | }); 7 | 8 | afterEach(function() { 9 | 10 | }); 11 | 12 | 13 | 14 | it("should be able to get a non null storage", function() { 15 | expect(storage).not.toBeNull(); 16 | expect(storage).toBeDefined(); 17 | 18 | }); 19 | 20 | it("should be able to get a blank storage", function() { 21 | expect(storage).not.toBeNull(); 22 | expect(storage).toBeDefined(); 23 | expect(storage.backend instanceof TransientStorage).toBeTruthy(); 24 | expect(storage.databank).not.toBeNull(); 25 | expect(storage.databank).toBeDefined(); 26 | }); 27 | 28 | it("should be able to store properties relations", function() { 29 | var object = 'Unreal Engine'; 30 | var relation = 'website'; 31 | var name = 'http://unrealengine.com'; 32 | var real_name = "Unreal_Engine"; 33 | storage.storeProperty(object, relation, name); 34 | var result = storage.queryProperty(object, relation); 35 | expect(result.value).toEqual(name); 36 | }); 37 | 38 | 39 | it("should be able to retrieve all properties", function() { 40 | var data = [ { name: 'http://unrealengine.com', 41 | relation: 'website' }, 42 | { name: '3D', 43 | relation: 'type' } ]; 44 | var object = 'Unreal Engine'; 45 | var real_name = "Unreal_Engine"; 46 | for (var i in data) { 47 | storage.storeProperty(object, data[i].relation, data[i].name); 48 | } 49 | 50 | var result = storage.queryAllProperties(object); 51 | expect(result instanceof Array).toBeTruthy(); 52 | expect(result.length).toEqual(2); 53 | 54 | for (var i in data) { 55 | expect(result).toContain(data[i]); 56 | } 57 | }); 58 | 59 | it("should be able to query databank for properties and fail properly", function() { 60 | // TODO should add something to refresh databank between each test 61 | var object = 'flower'; 62 | var name = 'Bert'; 63 | var relation = 'colour'; 64 | var result = storage.queryProperty(name,relation); 65 | expect(result).toEqual(undefined); 66 | }); 67 | 68 | 69 | it("should be able to store named entities", function() { 70 | var object = 'robot'; 71 | var name = 'Robbie'; 72 | storage.storeEntity(object, name); 73 | var result = storage.queryEntity(name); 74 | expect(result.type).toEqual(object); 75 | }); 76 | 77 | it("should be able to query databank", function() { 78 | var object = 'robot'; 79 | var name = 'Robbie'; 80 | storage.storeEntity(object, name); 81 | var result = storage.queryEntity(name); 82 | expect(result.type).toEqual(object); 83 | }) 84 | 85 | it("should be able to query databank and fail properly", function() { 86 | // TODO should add something to refresh databank between each test 87 | var object = 'flower'; 88 | var name = 'Bert'; 89 | var result = storage.queryEntity(name); 90 | expect(result).toEqual(undefined); 91 | }); 92 | 93 | it("should persist the transcript", function(){ 94 | fail(); 95 | }); 96 | 97 | 98 | it("can be cleared", function() { 99 | var storage = getStorage(new TransientStorage()); 100 | expect(storage).not.toBeNull(); 101 | expect(storage).toBeDefined(); 102 | var object = 'robot'; 103 | var name = 'Robbie'; 104 | storage.storeEntity(object, name); 105 | var result = storage.queryEntity(name); 106 | expect(result.type).toEqual(object); 107 | storage.clearDatabank(); 108 | var result = storage.queryEntity(name); 109 | expect(result).toBeUndefined(); 110 | 111 | }); 112 | 113 | 114 | }); 115 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | faqbot 2 | ====== 3 | 4 | JavaScript chat bot to answer frequently asked questions (faq) 5 | 6 | Preview the bot here: [http://htmlpreview.github.com/?https://github.com/tansaku/faqbot/blob/master/faq.html](http://htmlpreview.github.com/?https://github.com/tansaku/faqbot/blob/master/faq.html) 7 | 8 | ---- 9 | 10 | Most Natural Language Processing (NLP) approaches seem to be focused on how to parse sentences - not on how to construct sentences that match some model of the world (notable exception is SHRDLU) 11 | 12 | Most Chatbots seem designed to have a fixed database of material (AIML), use regexp (Eliza), although some do learn and remember word occurrence probabilities (MegaHAL) 13 | 14 | There appears to be an interesting opportunity for a chatbot that made additional use of NLP techniques and a good persistence framework in order to be a repository for knowledge and provide support in online text chat environments. 15 | 16 | This project is aiming to create a chatbot that can have a knowledge base updated by individual users through a process of discussion, e.g. 17 | 18 | Human: There is a game engine Unity3S 19 | Bot: OK 20 | Human: What is Unity3D? 21 | Bot: Unity3D is a game engine 22 | Human: Unity3D has a URL of http://www.studica.com/unity 23 | Bot: OK 24 | Human: What is the URL for Unity3D? 25 | Bot: The URL for Unity3D is http://www.studica.com/unity 26 | 27 | Ideally all conversations will be stored in the cloud, but not rely on having to maintain a specific server. In the first instance Github Gists seem like a good idea. An authenticated Github user could have gists created associated with their own user account. Although ideally the chatbot won't be tied to a particular persistence framework. 28 | 29 | This kind of system could have great applicaton for online classes. It would be great if the faqbots knowledge base could contain all the data related to an online class including individual assignment statuses, thus allowing interactions like this: 30 | 31 | Human: What's my next assignment? 32 | Bot: It's assignment 4 on prototyping a mobile interface [link] 33 | 34 | Having authenticated against something like a Google or Facebook login. 35 | 36 | Hubot and Skype are interestng related areas. 37 | 38 | We're trying to understand how hubot persists data between conversations (https://github.com/github/hubot/issues/373#issuecomment-11992999) and looks great in as much as hubot already has a skype plugin. Skype is interesting because many online classes use its text chat for communication, but it seems that a skype bot must run as a client on someone's computer. 39 | 40 | In the first instance we've decided to go with client side javascript since it can run in HTML pages, and give the most open possible access to the faqbot. Ideally we'd like to see simple HTML pages with the chatbot interface where authenticated users can have conversations like this: 41 | 42 | Human: There is a game engine Unreal Engine 43 | Bot: OK 44 | Human: What is Unreal Engine? 45 | Bot: Unreal Engine is a game 46 | Human: No, Unreal Engine is a game engine 47 | Bot: OK 48 | Human: What is Unreal Engine? 49 | Bot: Unreal Engine is a game engine 50 | 51 | where effectively the human is providing a specification of how the bot should be responding to a question. This specification should then be added automatically to the set of unit tests for the bot, so that it can be checked in future against subsequent changes in specification. 52 | 53 | Open Issues: 54 | 55 | 1) need support for named entity recognition in NaturalJS 56 | 2) need to fix on a first HTML interface 57 | 3) would like to provide predictive text look up to help scaffold users chatting with bot 58 | 59 | Background 60 | 61 | The current design of this system arises from an earlier attempt to implement the same approach in Python with the NLTK. This worked fine, except that it was not straightforward to install support for the NLTK on a cloud hosting service like Heroku. Also it seemed that members of the project were unlikely to download and hack on a python project that meant getting set up with NLTK locally. It seemed that if we made available a pure HTML interface that didn't rely on having particular software libraries installed on a server, and that chatting to the bot would mirror the process of generating unit tests for it's operation, then it might be easier to get more people involved. Ultimately the project can use any technology, but it makes sense to get some serious prototyping done by making simple versions of the system as accessible as possible. 62 | 63 | Running with chrome 64 | 65 | if running the faqbot with chrome locally from the filesystem, chrome will need to be launched with the --allow-access-from-files flag 66 | -------------------------------------------------------------------------------- /thirdparty/qunit-1.10.0.css: -------------------------------------------------------------------------------- 1 | /** 2 | * QUnit v1.10.0 - A JavaScript Unit Testing Framework 3 | * 4 | * http://qunitjs.com 5 | * 6 | * Copyright 2012 jQuery Foundation and other contributors 7 | * Released under the MIT license. 8 | * http://jquery.org/license 9 | */ 10 | 11 | /** Font Family and Sizes */ 12 | 13 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult { 14 | font-family: "Helvetica Neue Light", "HelveticaNeue-Light", "Helvetica Neue", Calibri, Helvetica, Arial, sans-serif; 15 | } 16 | 17 | #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult, #qunit-tests li { font-size: small; } 18 | #qunit-tests { font-size: smaller; } 19 | 20 | 21 | /** Resets */ 22 | 23 | #qunit-tests, #qunit-tests ol, #qunit-header, #qunit-banner, #qunit-userAgent, #qunit-testresult, #qunit-modulefilter { 24 | margin: 0; 25 | padding: 0; 26 | } 27 | 28 | 29 | /** Header */ 30 | 31 | #qunit-header { 32 | padding: 0.5em 0 0.5em 1em; 33 | 34 | color: #8699a4; 35 | background-color: #0d3349; 36 | 37 | font-size: 1.5em; 38 | line-height: 1em; 39 | font-weight: normal; 40 | 41 | border-radius: 5px 5px 0 0; 42 | -moz-border-radius: 5px 5px 0 0; 43 | -webkit-border-top-right-radius: 5px; 44 | -webkit-border-top-left-radius: 5px; 45 | } 46 | 47 | #qunit-header a { 48 | text-decoration: none; 49 | color: #c2ccd1; 50 | } 51 | 52 | #qunit-header a:hover, 53 | #qunit-header a:focus { 54 | color: #fff; 55 | } 56 | 57 | #qunit-testrunner-toolbar label { 58 | display: inline-block; 59 | padding: 0 .5em 0 .1em; 60 | } 61 | 62 | #qunit-banner { 63 | height: 5px; 64 | } 65 | 66 | #qunit-testrunner-toolbar { 67 | padding: 0.5em 0 0.5em 2em; 68 | color: #5E740B; 69 | background-color: #eee; 70 | overflow: hidden; 71 | } 72 | 73 | #qunit-userAgent { 74 | padding: 0.5em 0 0.5em 2.5em; 75 | background-color: #2b81af; 76 | color: #fff; 77 | text-shadow: rgba(0, 0, 0, 0.5) 2px 2px 1px; 78 | } 79 | 80 | #qunit-modulefilter-container { 81 | float: right; 82 | } 83 | 84 | /** Tests: Pass/Fail */ 85 | 86 | #qunit-tests { 87 | list-style-position: inside; 88 | } 89 | 90 | #qunit-tests li { 91 | padding: 0.4em 0.5em 0.4em 2.5em; 92 | border-bottom: 1px solid #fff; 93 | list-style-position: inside; 94 | } 95 | 96 | #qunit-tests.hidepass li.pass, #qunit-tests.hidepass li.running { 97 | display: none; 98 | } 99 | 100 | #qunit-tests li strong { 101 | cursor: pointer; 102 | } 103 | 104 | #qunit-tests li a { 105 | padding: 0.5em; 106 | color: #c2ccd1; 107 | text-decoration: none; 108 | } 109 | #qunit-tests li a:hover, 110 | #qunit-tests li a:focus { 111 | color: #000; 112 | } 113 | 114 | #qunit-tests ol { 115 | margin-top: 0.5em; 116 | padding: 0.5em; 117 | 118 | background-color: #fff; 119 | 120 | border-radius: 5px; 121 | -moz-border-radius: 5px; 122 | -webkit-border-radius: 5px; 123 | } 124 | 125 | #qunit-tests table { 126 | border-collapse: collapse; 127 | margin-top: .2em; 128 | } 129 | 130 | #qunit-tests th { 131 | text-align: right; 132 | vertical-align: top; 133 | padding: 0 .5em 0 0; 134 | } 135 | 136 | #qunit-tests td { 137 | vertical-align: top; 138 | } 139 | 140 | #qunit-tests pre { 141 | margin: 0; 142 | white-space: pre-wrap; 143 | word-wrap: break-word; 144 | } 145 | 146 | #qunit-tests del { 147 | background-color: #e0f2be; 148 | color: #374e0c; 149 | text-decoration: none; 150 | } 151 | 152 | #qunit-tests ins { 153 | background-color: #ffcaca; 154 | color: #500; 155 | text-decoration: none; 156 | } 157 | 158 | /*** Test Counts */ 159 | 160 | #qunit-tests b.counts { color: black; } 161 | #qunit-tests b.passed { color: #5E740B; } 162 | #qunit-tests b.failed { color: #710909; } 163 | 164 | #qunit-tests li li { 165 | padding: 5px; 166 | background-color: #fff; 167 | border-bottom: none; 168 | list-style-position: inside; 169 | } 170 | 171 | /*** Passing Styles */ 172 | 173 | #qunit-tests li li.pass { 174 | color: #3c510c; 175 | background-color: #fff; 176 | border-left: 10px solid #C6E746; 177 | } 178 | 179 | #qunit-tests .pass { color: #528CE0; background-color: #D2E0E6; } 180 | #qunit-tests .pass .test-name { color: #366097; } 181 | 182 | #qunit-tests .pass .test-actual, 183 | #qunit-tests .pass .test-expected { color: #999999; } 184 | 185 | #qunit-banner.qunit-pass { background-color: #C6E746; } 186 | 187 | /*** Failing Styles */ 188 | 189 | #qunit-tests li li.fail { 190 | color: #710909; 191 | background-color: #fff; 192 | border-left: 10px solid #EE5757; 193 | white-space: pre; 194 | } 195 | 196 | #qunit-tests > li:last-child { 197 | border-radius: 0 0 5px 5px; 198 | -moz-border-radius: 0 0 5px 5px; 199 | -webkit-border-bottom-right-radius: 5px; 200 | -webkit-border-bottom-left-radius: 5px; 201 | } 202 | 203 | #qunit-tests .fail { color: #000000; background-color: #EE5757; } 204 | #qunit-tests .fail .test-name, 205 | #qunit-tests .fail .module-name { color: #000000; } 206 | 207 | #qunit-tests .fail .test-actual { color: #EE5757; } 208 | #qunit-tests .fail .test-expected { color: green; } 209 | 210 | #qunit-banner.qunit-fail { background-color: #EE5757; } 211 | 212 | 213 | /** Result */ 214 | 215 | #qunit-testresult { 216 | padding: 0.5em 0.5em 0.5em 2.5em; 217 | 218 | color: #2b81af; 219 | background-color: #D2E0E6; 220 | 221 | border-bottom: 1px solid white; 222 | } 223 | #qunit-testresult .module-name { 224 | font-weight: bold; 225 | } 226 | 227 | /** Fixture */ 228 | 229 | #qunit-fixture { 230 | position: absolute; 231 | top: -10000px; 232 | left: -10000px; 233 | width: 1000px; 234 | height: 1000px; 235 | } 236 | -------------------------------------------------------------------------------- /h1-p/submit.py: -------------------------------------------------------------------------------- 1 | ### The only things you'll have to edit (unless you're porting this script over to a different language) 2 | ### are at the bottom of this file. 3 | 4 | import urllib 5 | import urllib2 6 | import hashlib 7 | import random 8 | import email 9 | import email.message 10 | import email.encoders 11 | import StringIO 12 | import sys 13 | 14 | """""""""""""""""""" 15 | """""""""""""""""""" 16 | 17 | class NullDevice: 18 | def write(self, s): 19 | pass 20 | 21 | def submit(): 22 | print '==\n== [sandbox] Submitting Solutions \n==' 23 | 24 | (login, password) = loginPrompt() 25 | if not login: 26 | print '!! Submission Cancelled' 27 | return 28 | 29 | print '\n== Connecting to Coursera ... ' 30 | 31 | # Part Identifier 32 | (partIdx, sid) = partPrompt() 33 | 34 | # Get Challenge 35 | (login, ch, state, ch_aux) = getChallenge(login, sid) #sid is the "part identifier" 36 | if((not login) or (not ch) or (not state)): 37 | # Some error occured, error string in first return element. 38 | print '\n!! Error: %s\n' % login 39 | return 40 | 41 | # Attempt Submission with Challenge 42 | ch_resp = challengeResponse(login, password, ch) 43 | (result, string) = submitSolution(login, ch_resp, sid, output(partIdx), \ 44 | source(partIdx), state, ch_aux) 45 | 46 | print '== %s' % string.strip() 47 | 48 | 49 | # =========================== LOGIN HELPERS - NO NEED TO CONFIGURE THIS ======================================= 50 | 51 | def loginPrompt(): 52 | """Prompt the user for login credentials. Returns a tuple (login, password).""" 53 | (login, password) = basicPrompt() 54 | return login, password 55 | 56 | 57 | def basicPrompt(): 58 | """Prompt the user for login credentials. Returns a tuple (login, password).""" 59 | login = raw_input('Login (Email address): ') 60 | password = raw_input('One-time Password (from the assignment page. This is NOT your own account\'s password): ') 61 | return login, password 62 | 63 | def partPrompt(): 64 | print 'Hello! These are the assignment parts that you can submit:' 65 | counter = 0 66 | for part in partFriendlyNames: 67 | counter += 1 68 | print str(counter) + ') ' + partFriendlyNames[counter - 1] 69 | partIdx = int(raw_input('Please enter which part you want to submit (1-' + str(counter) + '): ')) - 1 70 | return (partIdx, partIds[partIdx]) 71 | 72 | def getChallenge(email, sid): 73 | """Gets the challenge salt from the server. Returns (email,ch,state,ch_aux).""" 74 | url = challenge_url() 75 | values = {'email_address' : email, 'assignment_part_sid' : sid, 'response_encoding' : 'delim'} 76 | data = urllib.urlencode(values) 77 | req = urllib2.Request(url, data) 78 | response = urllib2.urlopen(req) 79 | text = response.read().strip() 80 | 81 | # text is of the form email|ch|signature 82 | splits = text.split('|') 83 | if(len(splits) != 9): 84 | print 'Badly formatted challenge response: %s' % text 85 | return None 86 | return (splits[2], splits[4], splits[6], splits[8]) 87 | 88 | def challengeResponse(email, passwd, challenge): 89 | sha1 = hashlib.sha1() 90 | sha1.update("".join([challenge, passwd])) # hash the first elements 91 | digest = sha1.hexdigest() 92 | strAnswer = '' 93 | for i in range(0, len(digest)): 94 | strAnswer = strAnswer + digest[i] 95 | return strAnswer 96 | 97 | def challenge_url(): 98 | """Returns the challenge url.""" 99 | return "https://class.coursera.org/" + URL + "/assignment/challenge" 100 | 101 | def submit_url(): 102 | """Returns the submission url.""" 103 | return "https://class.coursera.org/" + URL + "/assignment/submit" 104 | 105 | def submitSolution(email_address, ch_resp, sid, output, source, state, ch_aux): 106 | """Submits a solution to the server. Returns (result, string).""" 107 | source_64_msg = email.message.Message() 108 | source_64_msg.set_payload(source) 109 | email.encoders.encode_base64(source_64_msg) 110 | 111 | output_64_msg = email.message.Message() 112 | output_64_msg.set_payload(output) 113 | email.encoders.encode_base64(output_64_msg) 114 | values = { 'assignment_part_sid' : sid, \ 115 | 'email_address' : email_address, \ 116 | #'submission' : output, \ 117 | 'submission' : output_64_msg.get_payload(), \ 118 | #'submission_aux' : source, \ 119 | 'submission_aux' : source_64_msg.get_payload(), \ 120 | 'challenge_response' : ch_resp, \ 121 | 'state' : state \ 122 | } 123 | url = submit_url() 124 | data = urllib.urlencode(values) 125 | req = urllib2.Request(url, data) 126 | response = urllib2.urlopen(req) 127 | string = response.read().strip() 128 | result = 0 129 | return result, string 130 | 131 | ## This collects the source code (just for logging purposes) 132 | def source(partIdx): 133 | # open the file, get all lines 134 | return "" 135 | 136 | 137 | 138 | ############ BEGIN ASSIGNMENT SPECIFIC CODE - YOU'LL HAVE TO EDIT THIS ############## 139 | 140 | # Make sure you change this string to the last segment of your class URL. 141 | # For example, if your URL is https://class.coursera.org/pgm-2012-001-staging, set it to "pgm-2012-001-staging". 142 | URL = 'nlangp-001' 143 | 144 | # the "Identifier" you used when creating the part 145 | partIds = ['hmm-part1', 'hmm-part2', 'hmm-part3'] 146 | # used to generate readable run-time information for students 147 | partFriendlyNames = ['Unigram Tagger', 'Trigram Tagger', 'Extended Tagger'] 148 | 149 | def output(partIdx): 150 | try: 151 | return open("gene_test.p%d.out"%(partIdx + 1)).read() 152 | except: 153 | print "File gene_test.p%d.out not found"%(partIdx + 1) 154 | exit() 155 | 156 | submit() 157 | -------------------------------------------------------------------------------- /querySpec.js: -------------------------------------------------------------------------------- 1 | describe("FaqBot", function() { 2 | var sentences = []; 3 | var answers = []; 4 | var storage = null; 5 | 6 | beforeEach(function() { 7 | storage = getStorage(new TransientStorage()); 8 | initStorage(storage); 9 | }); 10 | 11 | afterEach(function() { 12 | storage.clearDatabank(); 13 | }); 14 | 15 | it("should respond as expected ", function() { 16 | expect(query(storage,"There is a game engine Unreal Engine")).toEqual("why?"); 17 | }); 18 | 19 | // TODO ideally all this data would be in starting knowledge base for bot as well? 20 | // would be nice if we had this in some separate file perhaps? fixture? !! 21 | sentences.push("There is a game engine called Unreal Engine"); 22 | answers.push("Unreal Engine is a game engine"); 23 | // should be pushing some expected knowledge structure on here 24 | sentences.push("There is a horse called Matilda"); 25 | answers.push("Matilda is a horse"); 26 | sentences.push("There is a course called ML"); 27 | answers.push("ML is a course"); 28 | //sentences.push("Gandalf is a wizard"); // will require new regex - next step extract existing one 29 | //answers.push("Gandalf is a wizard"); 30 | sentences.push("Unreal Engine has a website http://unrealengine.com"); 31 | answers.push("The website for Unreal Engine is http://unrealengine.com"); 32 | 33 | /* 34 | sentences.push("There is a game engine called Unity3D"); 35 | answers.push("Unity3D is a game engine"); 36 | sentences.push("Unity3D has a URL of http://www.studica.com/unity"); 37 | answers.push("The URL for Unreal Engine is http://www.studica.com/unity"); 38 | sentences.push("Unity3D has a type of integrated"); 39 | answers.push("The type for Unreal Engine is integrated"); 40 | sentences.push("Unity3D has a typeof 3D"); 41 | answers.push("The type for Unity3D is 3D"); 42 | sentences.push("What type of game engine is Unity3D?"); 43 | answers.push("The type for Unity3D is '3D'") 44 | 45 | sentences.push("There is a game engine Crysis"); 46 | answers.push("Crysis is a game engine"); 47 | 48 | sentences.push("There is a game engine Source"); 49 | answers.push("Source is a game engine"); 50 | sentences.push("Source has a URL of http://source.valvesoftware.com/sourcesdk/sourceu.php"); 51 | answers.push("The URL for Source is http://source.valvesoftware.com/sourcesdk/sourceu.php"); 52 | */ 53 | 54 | var checkAnswer = function(i){ 55 | it( "should respond to \""+sentences[i] + "\" with --> \"" + answers[i]+ "\"", function() { 56 | 57 | expect(query(storage,sentences[i])).toEqual(answers[i]); 58 | // ideally we should be checking that data is stored in knowledge base ... 59 | // and dumping the knowledge base on each test iteration here ... 60 | }); 61 | } 62 | 63 | for (var i in sentences){ 64 | checkAnswer(i); 65 | } 66 | 67 | it("should match entity assertion regex", function() { 68 | // websites have URLs 69 | var result = matchEntityAssertionRegex("There is a robot called Robbie"); 70 | expect(result).toNotEqual(null); 71 | expect(result).toNotEqual(undefined); 72 | expect(result.object).toEqual("robot"); 73 | expect(result.name).toEqual("Robbie"); 74 | }); 75 | 76 | it("should match properties regex", function() { 77 | // websites have URLs 78 | var result = matchPropertiesRegex("Unreal Engine has a website http://unrealengine.com"); 79 | expect(result).toNotEqual(null); 80 | expect(result).toNotEqual(undefined); 81 | expect(result.object).toEqual("Unreal Engine"); 82 | expect(result.relation).toEqual("website"); 83 | expect(result.name).toEqual("http://unrealengine.com"); 84 | }); 85 | 86 | it("should remove punctuation", function() { 87 | expect(removePunctuation("Hello. How are you?")).toEqual("Hello How are you"); 88 | }); 89 | 90 | it("should query against a specific storage", function(){ 91 | expect(query(storage,"There is a game engine called Unreal Engine")).toEqual("Unreal Engine is a game engine"); 92 | }); 93 | 94 | it("should respond from database when asked about a one word item", function() { 95 | expect(query(storage,"There is a course called ML")).toEqual("ML is a course"); 96 | expect(query(storage,"What do you know about ML")).toEqual("I know that ML is a course"); 97 | expect(query(storage,"What do you know about ML?")).toEqual("I know that ML is a course"); 98 | }); 99 | 100 | it("should respond from database when asked about a two word item", function() { 101 | expect(query(storage,"There is a game engine called Unreal Engine")).toEqual("Unreal Engine is a game engine"); 102 | expect(query(storage,"Unreal Engine has a website http://unrealengine.com")).toEqual("The website for Unreal Engine is http://unrealengine.com"); 103 | expect(query(storage,"What do you know about Unreal Engine")).toEqual("I know that Unreal Engine is a game engine and website for Unreal Engine is http://unrealengine.com"); 104 | expect(query(storage,"What do you know about Unreal Engine?")).toEqual("I know that Unreal Engine is a game engine and website for Unreal Engine is http://unrealengine.com"); 105 | }); 106 | 107 | it("should be able to handle question based on passed in storage", function() { 108 | expect(query(storage,"There is a course called ML")).toEqual("ML is a course"); 109 | expect(handleQuestion(storage,"What do you know about ML")).toEqual("I know that ML is a course"); 110 | }); 111 | 112 | it("it should be able to handle queries about a single properties", function() { 113 | expect(query(storage,"There is a game engine called Unreal Engine")).toEqual("Unreal Engine is a game engine"); 114 | expect(query(storage,"Unreal Engine has a website http://unrealengine.com")).toEqual("The website for Unreal Engine is http://unrealengine.com"); 115 | expect(query(storage,"what is the website of Unreal Engine?")).toEqual("The website for Unreal Engine is http://unrealengine.com"); 116 | }); 117 | 118 | // not sure how/if we can have pending specs 119 | 120 | }); 121 | -------------------------------------------------------------------------------- /thirdparty/jquery.icndb.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Version: 0.1 3 | */ 4 | (function($) { 5 | $.icndb = {}; 6 | $.icndb.client = {} 7 | $.icndb.client.id = 4; 8 | $.icndb.client.version = 0.1; 9 | 10 | var base = "http://api.icndb.com/"; 11 | 12 | /** 13 | * Returns the full URL of the given resource. 14 | * Ex.: 'jokes/random/5' -> http://api.icndb.com/jokes/random/5?client=4&clientVersion=0.1 15 | * 16 | * @param resource The relative path of hte resource, NO LEADING '/' 17 | */ 18 | var full = function(resource) { 19 | return base + resource + '?client=' + $.icndb.client.id + '&clientVersion=' + $.icndb.client.version; 20 | }; 21 | 22 | /** 23 | * Calls the URL, evaluated the JSON returned and returns the result value as JS object on success. 24 | * No exceptions here, this always uses script communication. 25 | * 26 | * @param destination Location of the destination (String URL) 27 | * @param successCB(result) Callback on success. Will be called with result value as JS object. 28 | * result = {"type": , "value": } 29 | */ 30 | var callServer = function(destination, successCB, errorCB) { 31 | $.ajax({ 32 | url: destination, 33 | dataType: "jsonp", 34 | type: "GET", 35 | success: function(result) { 36 | successCB(result); 37 | } 38 | }); 39 | } 40 | 41 | /************************************************************************ 42 | * Simple API 43 | ************************************************************************/ 44 | 45 | /** 46 | * Returns multiple random Chuck Norris jokes to the callback function, optionally with given first name and last name. 47 | * There can be no error when retrieving random jokes. 48 | * 49 | * @param success(jokes: [{id: , joke: }]) 50 | * 51 | * OR 52 | * 53 | * @param { 54 | * success: function(jokes: [{id: , joke: }]) 55 | * number [optional] The number of jokes to retrieve. If not given, 1 joke is retrieved. 56 | * firstName [optional] The first name of the main character in the joke. 57 | * lastName [optional] The last name of the main character in the joke. 58 | * limitTo [optional] An array of categories (Strings) to which the joke may belong. 59 | * exclude [optional, only processed if limitTo not given] An array of categories (Strings) to which the joke may not belong. 60 | * } 61 | */ 62 | $.icndb.getRandomJokes = function(args) { 63 | var success = function(result) { 64 | // notice: never NoSuchJokeException with random jokes 65 | if(args.success) { 66 | args.success(result.value); 67 | } else { 68 | args(result.value); 69 | } 70 | } 71 | var number = 1; 72 | if(args.number) { 73 | number = args.number; 74 | } 75 | var url = full("jokes/random/" + number); 76 | if(args.firstName) { 77 | url += "&firstName=" + args.firstName; 78 | } 79 | if(args.lastName) { 80 | url += "&lastName=" + args.lastName; 81 | } 82 | if(args.limitTo) { 83 | url += "&limitTo=[" + args.limitTo.toString() + "]"; 84 | } else if(args.exclude) { 85 | url += "&exclude=[" + args.exclude.toString() + "]"; 86 | } 87 | callServer(url, success, function() {} ); 88 | }; 89 | 90 | /** 91 | * Returns a random Chuck Norris joke to the callback function, optionally with given first name and last name. 92 | * There can be no error when retrieving a random joke. 93 | * 94 | * @param success(joke: {id: , joke: }) 95 | * 96 | * OR 97 | * 98 | * @param { 99 | * success: function(joke: {id: , joke: }) 100 | * firstName [optional] The first name of the main character in the joke. 101 | * lastName [optional] The last name of the main character in the joke. 102 | * limitTo [optional] An array of categories (Strings) to which the joke may belong. 103 | * exclude [optional, only processed if limitTo not given] An array of categories (Strings) to which the joke may not belong. 104 | * } 105 | */ 106 | $.icndb.getRandomJoke = function(args) { 107 | var args2 = {}; 108 | $.extend(args2, args); 109 | args2.success = function(result) { 110 | // notice: never NoSuchJokeException with random jokes 111 | result = result[0]; 112 | if(args.success) { 113 | args.success(result); 114 | } else { 115 | args(result); 116 | } 117 | }; 118 | args2.number = 1; 119 | $.icndb.getRandomJokes(args2); 120 | }; 121 | 122 | /** 123 | * Returns all the jokes in the database. 124 | * 125 | * @param success: function(jokes: [{id: , joke: }]) 126 | * 127 | * OR 128 | * 129 | * @param { 130 | * success: function(jokes: [{id: , joke: }]) 131 | * firstName [optional] The first name of the main character in the joke. 132 | * lastName [optional] The last name of the main character in the joke. 133 | * limitTo [optional] An array of categories (Strings) to which the joke may belong. 134 | * exclude [optional, only processed if limitTo not given] An array of categories (Strings) to which the joke may not belong. 135 | */ 136 | $.icndb.getJokes = function(args) { 137 | var success = function(result) { 138 | // notice: never NoSuchJokeException when retrieving all jokes 139 | if(args.success) { 140 | args.success(result.value); 141 | } else { 142 | args(result.value); 143 | } 144 | } 145 | var url = full("jokes"); 146 | if(args.firstName) { 147 | url += "&firstName=" + args.firstName; 148 | } 149 | if(args.lastName) { 150 | url += "&lastName=" + args.lastName; 151 | } 152 | if(args.limitTo) { 153 | url += "&limitTo=[" + args.limitTo.toString() + "]"; 154 | } else if(args.exclude) { 155 | url += "&exclude=[" + args.exclude.toString() + "]"; 156 | } 157 | callServer(url, success, function() {} ); 158 | }; 159 | 160 | /** 161 | * Returns the categories in the systems as an array of strings. 162 | * 163 | * @param callback:function(categories:[String]) 164 | */ 165 | $.icndb.getCategories = function(callback) { 166 | var success = function(result) { 167 | callback(result.value); 168 | }; 169 | var url = full("categories"); 170 | callServer(url, success, function() {} ); 171 | } 172 | 173 | /** 174 | * Returns the number of jokes in the database. 175 | * 176 | * @param callback:function(categories:[integer]) 177 | */ 178 | $.icndb.getNumberOfJokes = function(callback) { 179 | var success = function(result) { 180 | callback(result.value); 181 | }; 182 | var url = full("jokes/count"); 183 | callServer(url, success, function() {} ); 184 | } 185 | })(jQuery); 186 | -------------------------------------------------------------------------------- /thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css: -------------------------------------------------------------------------------- 1 | body { background-color: #eeeeee; padding: 0; margin: 5px; overflow-y: scroll; } 2 | 3 | #HTMLReporter { font-size: 11px; font-family: Monaco, "Lucida Console", monospace; line-height: 14px; color: #333333; } 4 | #HTMLReporter a { text-decoration: none; } 5 | #HTMLReporter a:hover { text-decoration: underline; } 6 | #HTMLReporter p, #HTMLReporter h1, #HTMLReporter h2, #HTMLReporter h3, #HTMLReporter h4, #HTMLReporter h5, #HTMLReporter h6 { margin: 0; line-height: 14px; } 7 | #HTMLReporter .banner, #HTMLReporter .symbolSummary, #HTMLReporter .summary, #HTMLReporter .resultMessage, #HTMLReporter .specDetail .description, #HTMLReporter .alert .bar, #HTMLReporter .stackTrace { padding-left: 9px; padding-right: 9px; } 8 | #HTMLReporter #jasmine_content { position: fixed; right: 100%; } 9 | #HTMLReporter .version { color: #aaaaaa; } 10 | #HTMLReporter .banner { margin-top: 14px; } 11 | #HTMLReporter .duration { color: #aaaaaa; float: right; } 12 | #HTMLReporter .symbolSummary { overflow: hidden; *zoom: 1; margin: 14px 0; } 13 | #HTMLReporter .symbolSummary li { display: block; float: left; height: 7px; width: 14px; margin-bottom: 7px; font-size: 16px; } 14 | #HTMLReporter .symbolSummary li.passed { font-size: 14px; } 15 | #HTMLReporter .symbolSummary li.passed:before { color: #5e7d00; content: "\02022"; } 16 | #HTMLReporter .symbolSummary li.failed { line-height: 9px; } 17 | #HTMLReporter .symbolSummary li.failed:before { color: #b03911; content: "x"; font-weight: bold; margin-left: -1px; } 18 | #HTMLReporter .symbolSummary li.skipped { font-size: 14px; } 19 | #HTMLReporter .symbolSummary li.skipped:before { color: #bababa; content: "\02022"; } 20 | #HTMLReporter .symbolSummary li.pending { line-height: 11px; } 21 | #HTMLReporter .symbolSummary li.pending:before { color: #aaaaaa; content: "-"; } 22 | #HTMLReporter .exceptions { color: #fff; float: right; margin-top: 5px; margin-right: 5px; } 23 | #HTMLReporter .bar { line-height: 28px; font-size: 14px; display: block; color: #eee; } 24 | #HTMLReporter .runningAlert { background-color: #666666; } 25 | #HTMLReporter .skippedAlert { background-color: #aaaaaa; } 26 | #HTMLReporter .skippedAlert:first-child { background-color: #333333; } 27 | #HTMLReporter .skippedAlert:hover { text-decoration: none; color: white; text-decoration: underline; } 28 | #HTMLReporter .passingAlert { background-color: #a6b779; } 29 | #HTMLReporter .passingAlert:first-child { background-color: #5e7d00; } 30 | #HTMLReporter .failingAlert { background-color: #cf867e; } 31 | #HTMLReporter .failingAlert:first-child { background-color: #b03911; } 32 | #HTMLReporter .results { margin-top: 14px; } 33 | #HTMLReporter #details { display: none; } 34 | #HTMLReporter .resultsMenu, #HTMLReporter .resultsMenu a { background-color: #fff; color: #333333; } 35 | #HTMLReporter.showDetails .summaryMenuItem { font-weight: normal; text-decoration: inherit; } 36 | #HTMLReporter.showDetails .summaryMenuItem:hover { text-decoration: underline; } 37 | #HTMLReporter.showDetails .detailsMenuItem { font-weight: bold; text-decoration: underline; } 38 | #HTMLReporter.showDetails .summary { display: none; } 39 | #HTMLReporter.showDetails #details { display: block; } 40 | #HTMLReporter .summaryMenuItem { font-weight: bold; text-decoration: underline; } 41 | #HTMLReporter .summary { margin-top: 14px; } 42 | #HTMLReporter .summary .suite .suite, #HTMLReporter .summary .specSummary { margin-left: 14px; } 43 | #HTMLReporter .summary .specSummary.passed a { color: #5e7d00; } 44 | #HTMLReporter .summary .specSummary.failed a { color: #b03911; } 45 | #HTMLReporter .description + .suite { margin-top: 0; } 46 | #HTMLReporter .suite { margin-top: 14px; } 47 | #HTMLReporter .suite a { color: #333333; } 48 | #HTMLReporter #details .specDetail { margin-bottom: 28px; } 49 | #HTMLReporter #details .specDetail .description { display: block; color: white; background-color: #b03911; } 50 | #HTMLReporter .resultMessage { padding-top: 14px; color: #333333; } 51 | #HTMLReporter .resultMessage span.result { display: block; } 52 | #HTMLReporter .stackTrace { margin: 5px 0 0 0; max-height: 224px; overflow: auto; line-height: 18px; color: #666666; border: 1px solid #ddd; background: white; white-space: pre; } 53 | 54 | #TrivialReporter { padding: 8px 13px; position: absolute; top: 0; bottom: 0; left: 0; right: 0; overflow-y: scroll; background-color: white; font-family: "Helvetica Neue Light", "Lucida Grande", "Calibri", "Arial", sans-serif; /*.resultMessage {*/ /*white-space: pre;*/ /*}*/ } 55 | #TrivialReporter a:visited, #TrivialReporter a { color: #303; } 56 | #TrivialReporter a:hover, #TrivialReporter a:active { color: blue; } 57 | #TrivialReporter .run_spec { float: right; padding-right: 5px; font-size: .8em; text-decoration: none; } 58 | #TrivialReporter .banner { color: #303; background-color: #fef; padding: 5px; } 59 | #TrivialReporter .logo { float: left; font-size: 1.1em; padding-left: 5px; } 60 | #TrivialReporter .logo .version { font-size: .6em; padding-left: 1em; } 61 | #TrivialReporter .runner.running { background-color: yellow; } 62 | #TrivialReporter .options { text-align: right; font-size: .8em; } 63 | #TrivialReporter .suite { border: 1px outset gray; margin: 5px 0; padding-left: 1em; } 64 | #TrivialReporter .suite .suite { margin: 5px; } 65 | #TrivialReporter .suite.passed { background-color: #dfd; } 66 | #TrivialReporter .suite.failed { background-color: #fdd; } 67 | #TrivialReporter .spec { margin: 5px; padding-left: 1em; clear: both; } 68 | #TrivialReporter .spec.failed, #TrivialReporter .spec.passed, #TrivialReporter .spec.skipped { padding-bottom: 5px; border: 1px solid gray; } 69 | #TrivialReporter .spec.failed { background-color: #fbb; border-color: red; } 70 | #TrivialReporter .spec.passed { background-color: #bfb; border-color: green; } 71 | #TrivialReporter .spec.skipped { background-color: #bbb; } 72 | #TrivialReporter .messages { border-left: 1px dashed gray; padding-left: 1em; padding-right: 1em; } 73 | #TrivialReporter .passed { background-color: #cfc; display: none; } 74 | #TrivialReporter .failed { background-color: #fbb; } 75 | #TrivialReporter .skipped { color: #777; background-color: #eee; display: none; } 76 | #TrivialReporter .resultMessage span.result { display: block; line-height: 2em; color: black; } 77 | #TrivialReporter .resultMessage .mismatch { color: black; } 78 | #TrivialReporter .stackTrace { white-space: pre; font-size: .8em; margin-left: 10px; max-height: 5em; overflow: auto; border: 1px inset red; padding: 1em; background: #eef; } 79 | #TrivialReporter .finished-at { padding-left: 1em; font-size: .6em; } 80 | #TrivialReporter.show-passed .passed, #TrivialReporter.show-skipped .skipped { display: block; } 81 | #TrivialReporter #jasmine_content { position: fixed; right: 100%; } 82 | #TrivialReporter .runner { border: 1px solid gray; display: block; margin: 5px 0; padding: 2px 0 2px 10px; } 83 | -------------------------------------------------------------------------------- /storage.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Get the storage object to use 3 | */ 4 | if (typeof(Storage) != "undefined") { 5 | // Yay, we have HTML5 local storage 6 | 7 | // add methods to allow storage of general objects (storing anything 8 | // other than strings may not work in all browsers) 9 | Storage.prototype.getObject = function(key) { 10 | var value = this.getItem(key); 11 | return value && JSON.parse(value); 12 | } 13 | 14 | Storage.prototype.setObject = function(key, value) { 15 | this.setItem(key, JSON.stringify(value)); 16 | } 17 | } 18 | 19 | function getStorage(backend) { 20 | // Create a databank and add some common prefixes 21 | var databank = createDatabank(); 22 | 23 | if(backend !== undefined) { 24 | return new ChatbotStorage(databank, backend); 25 | } else if (typeof(Storage) != "undefined") { 26 | return new ChatbotStorage(databank, new LocalStorage()); 27 | } else { 28 | alert("no web storage, using Transient storage"); 29 | return new ChatbotStorage(databank, new TransientStorage()); 30 | } 31 | } 32 | 33 | function createDatabank() { 34 | // http://code.google.com/p/rdfquery/wiki/RdfPlugin 35 | return $.rdf.databank() 36 | .prefix('foaf', 'http://xmlns.com/foaf/0.1/') 37 | .prefix('dc', 'http://purl.org/dc/elements/1.1/') 38 | .prefix('dct', 'http://purl.org/dc/terms/') 39 | .prefix('sam', 'http://linklens.blogspot.com/'); 40 | } 41 | 42 | function trim1 (str) { 43 | return str.replace(/^\s\s*/, '').replace(/\s\s*$/, ''); 44 | } 45 | 46 | function ChatbotStorage(db, backend) { 47 | this.databank = db; 48 | this.backend = backend; 49 | this.clearTranscript(); 50 | } 51 | 52 | ChatbotStorage.prototype.getDatabank = function() { 53 | return this.databank; 54 | } 55 | 56 | ChatbotStorage.prototype.getTranscript = function() { 57 | return this.transcript; 58 | } 59 | 60 | ChatbotStorage.prototype.getKnowledgeBaseAsText = function() { 61 | return this.backend.getItem('rdf'); 62 | } 63 | 64 | ChatbotStorage.prototype.isEmpty = function() { 65 | return this.backend.getItem('rdf') == undefined; 66 | } 67 | 68 | ChatbotStorage.prototype.clearDatabank = function() { 69 | this.databank = createDatabank(); 70 | this.backend.setItem("rdf", ""); 71 | } 72 | 73 | ChatbotStorage.prototype.storeEntity = function(object,name){ 74 | name = name.replace(' ','_'); 75 | this.getDatabank() 76 | .add(stringToResource(name) + ' a ' + quote(object)) 77 | .add(stringToResource(name) + ' foaf:name ' + quote(name)); 78 | } 79 | 80 | ChatbotStorage.prototype.storeProperty = function (object, relation, name){ 81 | object = object.replace(' ','_'); 82 | this.getDatabank() 83 | .add(stringToResource(object) + ' sam:' + relation + ' ' + quote(name)); 84 | } 85 | 86 | ChatbotStorage.prototype.queryProperty = function (object, relation){ 87 | object = object.replace(' ','_'); 88 | var raw = $.rdf({databank:this.getDatabank()}).where('_:'+object+' sam:'+relation+' ?value').select(['value'])[0]; 89 | if(raw === undefined){ 90 | return undefined; 91 | } 92 | return { value: raw.value.value }; 93 | } 94 | 95 | ChatbotStorage.prototype.queryAllProperties = function (object){ 96 | object = object.replace(' ','_'); 97 | var results = $.rdf({databank:this.getDatabank()}).where('_:'+object+' ?relation ?value').select(['relation','value']); 98 | var response = []; 99 | for(var i in results){ 100 | response.push({'name':results[i].value.value,'relation':results[i].relation.value.path.substring(1)}); 101 | } 102 | return response; 103 | } 104 | 105 | ChatbotStorage.prototype.queryEntity = function(name) { 106 | // doing this because databank seems to introduce trailing space into name 107 | // TODO contact the rdf project people to let them know 108 | var raw = $.rdf({databank:this.getDatabank()}).where('_:'+name+' a ?type').select(['type'])[0]; 109 | if(raw === undefined){ 110 | return undefined; 111 | } 112 | var value = raw.type.value || ""; 113 | return { type: value.trim()}; 114 | } 115 | 116 | ChatbotStorage.prototype.clearTranscript = function() { 117 | this.transcript = [ ]; 118 | } 119 | 120 | ChatbotStorage.prototype.addToTranscript = function(who, what) { 121 | var entry = { timestamp: new Date(), actor: who, text: what }; 122 | this.transcript.push(entry); 123 | } 124 | 125 | ChatbotStorage.prototype.loadKnowledgeBaseFromString = function(turtle) { 126 | this.databank.load(turtle, { format: 'text/turtle'}); 127 | } 128 | 129 | ChatbotStorage.prototype.load = function() { 130 | var turtle = this.getKnowledgeBaseAsText(); 131 | if (turtle !== null) { 132 | // trim any whitespace 133 | turtle = trim1(turtle); 134 | 135 | // trim any surrounding double quotes 136 | if (turtle.substring(0,1) === '"') { 137 | turtle = turtle.substring(1, turtle.length-2); 138 | } 139 | this.databank.load(turtle, { format: 'text/turtle'}); 140 | } 141 | 142 | var ts = this.backend.getObject("transcript"); 143 | if (ts != null) { 144 | this.transcript = ts; 145 | } 146 | } 147 | 148 | ChatbotStorage.prototype.save = function() { 149 | var turtle = this.databank.dump({ format: 'text/turtle'}); 150 | this.backend.setItem("rdf", turtle); 151 | this.backend.setObject("transcript", this.transcript); 152 | } 153 | 154 | 155 | /* 156 | * Wrapper class using HTML5 storage. Need this because we can't seem to 157 | * return localStorage from functions 158 | */ 159 | function LocalStorage() { 160 | } 161 | 162 | LocalStorage.prototype.getItem = function(key) { 163 | return localStorage.getItem(key); 164 | } 165 | 166 | LocalStorage.prototype.setItem = function(key, value) { 167 | localStorage.setItem(key, value) 168 | } 169 | 170 | LocalStorage.prototype.getObject = function(key) { 171 | return localStorage.getObject(key); 172 | } 173 | 174 | LocalStorage.prototype.setObject = function(key, value) { 175 | localStorage.setObject(key, value); 176 | } 177 | 178 | /* 179 | * Fallback class to give us the illusion of storage if HTML5 storage is 180 | * not available - works until we refresh or leave the page. 181 | */ 182 | function TransientStorage() { 183 | this.store = new Object(); 184 | } 185 | 186 | TransientStorage.prototype.getItem = function(key) { 187 | return this.store[key]; 188 | } 189 | 190 | TransientStorage.prototype.setItem = function(key, value) { 191 | this.store[key] = value; 192 | } 193 | 194 | TransientStorage.prototype.getObject = function(key) { 195 | return this.getItem(key); 196 | } 197 | 198 | TransientStorage.prototype.setObject = function(key, value) { 199 | this.setItem(key, value); 200 | } 201 | 202 | -------------------------------------------------------------------------------- /h1-p/count_freqs.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | __author__="Daniel Bauer " 4 | __date__ ="$Sep 12, 2011" 5 | 6 | import sys 7 | from collections import defaultdict 8 | import math 9 | 10 | """ 11 | Count n-gram frequencies in a data file and write counts to 12 | stdout. 13 | """ 14 | 15 | def simple_conll_corpus_iterator(corpus_file): 16 | """ 17 | Get an iterator object over the corpus file. The elements of the 18 | iterator contain (word, ne_tag) tuples. Blank lines, indicating 19 | sentence boundaries return (None, None). 20 | """ 21 | l = corpus_file.readline() 22 | while l: 23 | line = l.strip() 24 | if line: # Nonempty line 25 | # Extract information from line. 26 | # Each line has the format 27 | # word pos_tag phrase_tag ne_tag 28 | fields = line.split(" ") 29 | ne_tag = fields[-1] 30 | #phrase_tag = fields[-2] #Unused 31 | #pos_tag = fields[-3] #Unused 32 | word = " ".join(fields[:-1]) 33 | yield word, ne_tag 34 | else: # Empty line 35 | yield (None, None) 36 | l = corpus_file.readline() 37 | 38 | def sentence_iterator(corpus_iterator): 39 | """ 40 | Return an iterator object that yields one sentence at a time. 41 | Sentences are represented as lists of (word, ne_tag) tuples. 42 | """ 43 | current_sentence = [] #Buffer for the current sentence 44 | for l in corpus_iterator: 45 | if l==(None, None): 46 | if current_sentence: #Reached the end of a sentence 47 | yield current_sentence 48 | current_sentence = [] #Reset buffer 49 | else: # Got empty input stream 50 | sys.stderr.write("WARNING: Got empty input file/stream.\n") 51 | raise StopIteration 52 | else: 53 | current_sentence.append(l) #Add token to the buffer 54 | 55 | if current_sentence: # If the last line was blank, we're done 56 | yield current_sentence #Otherwise when there is no more token 57 | # in the stream return the last sentence. 58 | 59 | def get_ngrams(sent_iterator, n): 60 | """ 61 | Get a generator that returns n-grams over the entire corpus, 62 | respecting sentence boundaries and inserting boundary tokens. 63 | Sent_iterator is a generator object whose elements are lists 64 | of tokens. 65 | """ 66 | for sent in sent_iterator: 67 | #Add boundary symbols to the sentence 68 | w_boundary = (n-1) * [(None, "*")] 69 | w_boundary.extend(sent) 70 | w_boundary.append((None, "STOP")) 71 | #Then extract n-grams 72 | ngrams = (tuple(w_boundary[i:i+n]) for i in xrange(len(w_boundary)-n+1)) 73 | for n_gram in ngrams: #Return one n-gram at a time 74 | yield n_gram 75 | 76 | 77 | class Hmm(object): 78 | """ 79 | Stores counts for n-grams and emissions. 80 | """ 81 | 82 | def __init__(self, n=3): 83 | assert n>=2, "Expecting n>=2." 84 | self.n = n 85 | self.emission_counts = defaultdict(int) 86 | self.ngram_counts = [defaultdict(int) for i in xrange(self.n)] 87 | self.all_states = set() 88 | 89 | def train(self, corpus_file): 90 | """ 91 | Count n-gram frequencies and emission probabilities from a corpus file. 92 | """ 93 | ngram_iterator = \ 94 | get_ngrams(sentence_iterator(simple_conll_corpus_iterator(corpus_file)), self.n) 95 | 96 | for ngram in ngram_iterator: 97 | #Sanity check: n-gram we get from the corpus stream needs to have the right length 98 | assert len(ngram) == self.n, "ngram in stream is %i, expected %i" % (len(ngram, self.n)) 99 | 100 | tagsonly = tuple([ne_tag for word, ne_tag in ngram]) #retrieve only the tags 101 | for i in xrange(2, self.n+1): #Count NE-tag 2-grams..n-grams 102 | self.ngram_counts[i-1][tagsonly[-i:]] += 1 103 | 104 | if ngram[-1][0] is not None: # If this is not the last word in a sentence 105 | self.ngram_counts[0][tagsonly[-1:]] += 1 # count 1-gram 106 | self.emission_counts[ngram[-1]] += 1 # and emission frequencies 107 | 108 | # Need to count a single n-1-gram of sentence start symbols per sentence 109 | if ngram[-2][0] is None: # this is the first n-gram in a sentence 110 | self.ngram_counts[self.n - 2][tuple((self.n - 1) * ["*"])] += 1 111 | 112 | def write_counts(self, output, printngrams=[1,2,3]): 113 | """ 114 | Writes counts to the output file object. 115 | Format: 116 | 117 | """ 118 | # First write counts for emissions 119 | for word, ne_tag in self.emission_counts: 120 | output.write("%i WORDTAG %s %s\n" % (self.emission_counts[(word, ne_tag)], ne_tag, word)) 121 | 122 | 123 | # Then write counts for all ngrams 124 | for n in printngrams: 125 | for ngram in self.ngram_counts[n-1]: 126 | ngramstr = " ".join(ngram) 127 | output.write("%i %i-GRAM %s\n" %(self.ngram_counts[n-1][ngram], n, ngramstr)) 128 | 129 | def read_counts(self, corpusfile): 130 | 131 | self.n = 3 132 | self.emission_counts = defaultdict(int) 133 | self.ngram_counts = [defaultdict(int) for i in xrange(self.n)] 134 | self.all_states = set() 135 | 136 | for line in corpusfile: 137 | parts = line.strip().split(" ") 138 | count = float(parts[0]) 139 | if parts[1] == "WORDTAG": 140 | ne_tag = parts[2] 141 | word = parts[3] 142 | self.emission_counts[(word, ne_tag)] = count 143 | self.all_states.add(ne_tag) 144 | elif parts[1].endswith("GRAM"): 145 | n = int(parts[1].replace("-GRAM","")) 146 | ngram = tuple(parts[2:]) 147 | self.ngram_counts[n-1][ngram] = count 148 | 149 | 150 | 151 | def usage(): 152 | print """ 153 | python count_freqs.py [input_file] > [output_file] 154 | Read in a gene tagged training input file and produce counts. 155 | """ 156 | 157 | if __name__ == "__main__": 158 | 159 | if len(sys.argv)!=2: # Expect exactly one argument: the training data file 160 | usage() 161 | sys.exit(2) 162 | 163 | try: 164 | input = file(sys.argv[1],"r") 165 | except IOError: 166 | sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg) 167 | sys.exit(1) 168 | 169 | # Initialize a trigram counter 170 | counter = Hmm(3) 171 | # Collect counts 172 | counter.train(input) 173 | # Write the counts 174 | counter.write_counts(sys.stdout) 175 | -------------------------------------------------------------------------------- /thirdparty/removeStopWords.js: -------------------------------------------------------------------------------- 1 | /* 2 | * String method to remove stop words 3 | * Written by GeekLad http://geeklad.com 4 | * Stop words obtained from http://www.lextek.com/manuals/onix/stopwords1.html 5 | * Usage: string_variable.removeStopWords(); 6 | * Output: The original String with stop words removed 7 | */ 8 | String.prototype.removeStopWords = function() { 9 | var x; 10 | var y; 11 | var word; 12 | var stop_word; 13 | var regex_str; 14 | var regex; 15 | var cleansed_string = this.valueOf(); 16 | var stop_words = new Array( 17 | 'a', 18 | 'about', 19 | 'above', 20 | 'across', 21 | 'after', 22 | 'again', 23 | 'against', 24 | 'all', 25 | 'almost', 26 | 'alone', 27 | 'along', 28 | 'already', 29 | 'also', 30 | 'although', 31 | 'always', 32 | 'among', 33 | 'an', 34 | 'and', 35 | 'another', 36 | 'any', 37 | 'anybody', 38 | 'anyone', 39 | 'anything', 40 | 'anywhere', 41 | 'are', 42 | 'area', 43 | 'areas', 44 | 'around', 45 | 'as', 46 | 'ask', 47 | 'asked', 48 | 'asking', 49 | 'asks', 50 | 'at', 51 | 'away', 52 | 'b', 53 | 'back', 54 | 'backed', 55 | 'backing', 56 | 'backs', 57 | 'be', 58 | 'became', 59 | 'because', 60 | 'become', 61 | 'becomes', 62 | 'been', 63 | 'before', 64 | 'began', 65 | 'behind', 66 | 'being', 67 | 'beings', 68 | 'best', 69 | 'better', 70 | 'between', 71 | 'big', 72 | 'both', 73 | 'but', 74 | 'by', 75 | 'c', 76 | 'came', 77 | 'can', 78 | 'cannot', 79 | 'case', 80 | 'cases', 81 | 'certain', 82 | 'certainly', 83 | 'clear', 84 | 'clearly', 85 | 'come', 86 | 'could', 87 | 'd', 88 | 'did', 89 | 'differ', 90 | 'different', 91 | 'differently', 92 | 'do', 93 | 'does', 94 | 'done', 95 | 'down', 96 | 'down', 97 | 'downed', 98 | 'downing', 99 | 'downs', 100 | 'during', 101 | 'e', 102 | 'each', 103 | 'early', 104 | 'either', 105 | 'end', 106 | 'ended', 107 | 'ending', 108 | 'ends', 109 | 'enough', 110 | 'even', 111 | 'evenly', 112 | 'ever', 113 | 'every', 114 | 'everybody', 115 | 'everyone', 116 | 'everything', 117 | 'everywhere', 118 | 'f', 119 | 'face', 120 | 'faces', 121 | 'fact', 122 | 'facts', 123 | 'far', 124 | 'felt', 125 | 'few', 126 | 'find', 127 | 'finds', 128 | 'first', 129 | 'for', 130 | 'four', 131 | 'from', 132 | 'full', 133 | 'fully', 134 | 'further', 135 | 'furthered', 136 | 'furthering', 137 | 'furthers', 138 | 'g', 139 | 'gave', 140 | 'general', 141 | 'generally', 142 | 'get', 143 | 'gets', 144 | 'give', 145 | 'given', 146 | 'gives', 147 | 'go', 148 | 'going', 149 | 'good', 150 | 'goods', 151 | 'got', 152 | 'great', 153 | 'greater', 154 | 'greatest', 155 | 'group', 156 | 'grouped', 157 | 'grouping', 158 | 'groups', 159 | 'h', 160 | 'had', 161 | 'has', 162 | 'have', 163 | 'having', 164 | 'he', 165 | 'her', 166 | 'here', 167 | 'herself', 168 | 'high', 169 | 'high', 170 | 'high', 171 | 'higher', 172 | 'highest', 173 | 'him', 174 | 'himself', 175 | 'his', 176 | 'how', 177 | 'however', 178 | 'i', 179 | 'if', 180 | 'important', 181 | 'in', 182 | 'interest', 183 | 'interested', 184 | 'interesting', 185 | 'interests', 186 | 'into', 187 | 'is', 188 | 'it', 189 | 'its', 190 | 'itself', 191 | 'j', 192 | 'just', 193 | 'k', 194 | 'keep', 195 | 'keeps', 196 | 'kind', 197 | 'knew', 198 | 'know', 199 | 'known', 200 | 'knows', 201 | 'l', 202 | 'large', 203 | 'largely', 204 | 'last', 205 | 'later', 206 | 'latest', 207 | 'least', 208 | 'less', 209 | 'let', 210 | 'lets', 211 | 'like', 212 | 'likely', 213 | 'long', 214 | 'longer', 215 | 'longest', 216 | 'm', 217 | 'made', 218 | 'make', 219 | 'making', 220 | 'man', 221 | 'many', 222 | 'may', 223 | 'me', 224 | 'member', 225 | 'members', 226 | 'men', 227 | 'might', 228 | 'more', 229 | 'most', 230 | 'mostly', 231 | 'mr', 232 | 'mrs', 233 | 'much', 234 | 'must', 235 | 'my', 236 | 'myself', 237 | 'n', 238 | 'necessary', 239 | 'need', 240 | 'needed', 241 | 'needing', 242 | 'needs', 243 | 'never', 244 | 'new', 245 | 'new', 246 | 'newer', 247 | 'newest', 248 | 'next', 249 | 'no', 250 | 'nobody', 251 | 'non', 252 | 'noone', 253 | 'not', 254 | 'nothing', 255 | 'now', 256 | 'nowhere', 257 | 'number', 258 | 'numbers', 259 | 'o', 260 | 'of', 261 | 'off', 262 | 'often', 263 | 'old', 264 | 'older', 265 | 'oldest', 266 | 'on', 267 | 'once', 268 | 'one', 269 | 'only', 270 | 'open', 271 | 'opened', 272 | 'opening', 273 | 'opens', 274 | 'or', 275 | 'order', 276 | 'ordered', 277 | 'ordering', 278 | 'orders', 279 | 'other', 280 | 'others', 281 | 'our', 282 | 'out', 283 | 'over', 284 | 'p', 285 | 'part', 286 | 'parted', 287 | 'parting', 288 | 'parts', 289 | 'per', 290 | 'perhaps', 291 | 'place', 292 | 'places', 293 | 'point', 294 | 'pointed', 295 | 'pointing', 296 | 'points', 297 | 'possible', 298 | 'present', 299 | 'presented', 300 | 'presenting', 301 | 'presents', 302 | 'problem', 303 | 'problems', 304 | 'put', 305 | 'puts', 306 | 'q', 307 | 'quite', 308 | 'r', 309 | 'rather', 310 | 'really', 311 | 'right', 312 | 'right', 313 | 'room', 314 | 'rooms', 315 | 's', 316 | 'said', 317 | 'same', 318 | 'saw', 319 | 'say', 320 | 'says', 321 | 'second', 322 | 'seconds', 323 | 'see', 324 | 'seem', 325 | 'seemed', 326 | 'seeming', 327 | 'seems', 328 | 'sees', 329 | 'several', 330 | 'shall', 331 | 'she', 332 | 'should', 333 | 'show', 334 | 'showed', 335 | 'showing', 336 | 'shows', 337 | 'side', 338 | 'sides', 339 | 'since', 340 | 'small', 341 | 'smaller', 342 | 'smallest', 343 | 'so', 344 | 'some', 345 | 'somebody', 346 | 'someone', 347 | 'something', 348 | 'somewhere', 349 | 'state', 350 | 'states', 351 | 'still', 352 | 'still', 353 | 'such', 354 | 'sure', 355 | 't', 356 | 'take', 357 | 'taken', 358 | 'than', 359 | 'that', 360 | 'the', 361 | 'their', 362 | 'them', 363 | 'then', 364 | 'there', 365 | 'therefore', 366 | 'these', 367 | 'they', 368 | 'thing', 369 | 'things', 370 | 'think', 371 | 'thinks', 372 | 'this', 373 | 'those', 374 | 'though', 375 | 'thought', 376 | 'thoughts', 377 | 'three', 378 | 'through', 379 | 'thus', 380 | 'to', 381 | 'today', 382 | 'together', 383 | 'too', 384 | 'took', 385 | 'toward', 386 | 'turn', 387 | 'turned', 388 | 'turning', 389 | 'turns', 390 | 'two', 391 | 'u', 392 | 'under', 393 | 'until', 394 | 'up', 395 | 'upon', 396 | 'us', 397 | 'use', 398 | 'used', 399 | 'uses', 400 | 'v', 401 | 'very', 402 | 'w', 403 | 'want', 404 | 'wanted', 405 | 'wanting', 406 | 'wants', 407 | 'was', 408 | 'way', 409 | 'ways', 410 | 'we', 411 | 'well', 412 | 'wells', 413 | 'went', 414 | 'were', 415 | 'what', 416 | 'when', 417 | 'where', 418 | 'whether', 419 | 'which', 420 | 'while', 421 | 'who', 422 | 'whole', 423 | 'whose', 424 | 'why', 425 | 'will', 426 | 'with', 427 | 'within', 428 | 'without', 429 | 'work', 430 | 'worked', 431 | 'working', 432 | 'works', 433 | 'would', 434 | 'x', 435 | 'y', 436 | 'year', 437 | 'years', 438 | 'yet', 439 | 'you', 440 | 'young', 441 | 'younger', 442 | 'youngest', 443 | 'your', 444 | 'yours', 445 | 'z' 446 | ) 447 | 448 | // Split out all the individual words in the phrase 449 | words = cleansed_string.match(/[^\s]+|\s+[^\s+]$/g) 450 | 451 | // Review all the words 452 | for(x=0; x < words.length; x++) { 453 | // For each word, check all the stop words 454 | for(y=0; y < stop_words.length; y++) { 455 | // Get the current word 456 | word = words[x].replace(/\s+|[^a-z]+/ig, ""); // Trim the word and remove non-alpha 457 | 458 | // Get the stop word 459 | stop_word = stop_words[y]; 460 | 461 | // If the word matches the stop word, remove it from the keywords 462 | if(word.toLowerCase() == stop_word) { 463 | // Build the regex 464 | regex_str = "^\\s*"+stop_word+"\\s*$"; // Only word 465 | regex_str += "|^\\s*"+stop_word+"\\s+"; // First word 466 | regex_str += "|\\s+"+stop_word+"\\s*$"; // Last word 467 | regex_str += "|\\s+"+stop_word+"\\s+"; // Word somewhere in the middle 468 | regex = new RegExp(regex_str, "ig"); 469 | 470 | // Remove the word from the keywords 471 | cleansed_string = cleansed_string.replace(regex, " "); 472 | } 473 | } 474 | } 475 | return cleansed_string.replace(/^\s+|\s+$/g, ""); 476 | } -------------------------------------------------------------------------------- /h1-p/viterbi.js: -------------------------------------------------------------------------------- 1 | // should use a gist - use API to grab data from that 2 | // or trying to grab it from my file system - 3 | // but either way we are now testing something that doesn't operate immediately 4 | // so we have to have a special type of test - have to look up testing 5 | // ajax http calls in jasmine 6 | function grab() { 7 | $.get('http://127.0.0.1/~sam/Github/faqbot/h1-p/gene.train.head', function(data) { 8 | $('#result').text(data); 9 | // so issue here is that we'd be quite happy to block waiting for this data 10 | // having pulled it in a single time ... 11 | }, "text"); 12 | } 13 | 14 | // so now we are pulling in data from file system - could pull in larger file? 15 | // test will be slow .. so? could be separate test ... not sure how we can 16 | // write results out ... just dump to browser? 17 | // need some other kind of interface to use system other than testing one ... 18 | // just like we have in faqbot ... 19 | 20 | function callAjax(callback,filename) { 21 | return $.ajax({ 22 | url: "/~sam/Github/faqbot/h1-p/"+filename, 23 | success: callback 24 | }); 25 | } 26 | 27 | // could have been testing this at a much lower level? 28 | function count(data){ 29 | // Comparison O 30 | var word_tags = new Hash({},0); 31 | var grams = new Hash({1:{},2:{},3:{}},0); 32 | var lines = data.split('\n'); 33 | var word, category; // could start with category being *, increment grams, and then ... 34 | var category_minus_one = '*'; 35 | var category_minus_two = '*'; 36 | for(var i in lines){ 37 | //debugger 38 | tokens = lines[i].split(' '); 39 | word = tokens[0]; 40 | category = tokens[1]; // e.g. 'O' or 'I-GENE' 41 | if(word === ''){ // is this our sentence break identifier 42 | category = 'STOP'; 43 | } 44 | else{ 45 | word_tags.set([word,category],word_tags.get([word,category])+1) 46 | } 47 | grams.set([1,category], grams.get([1,category])+1); 48 | grams.set([2,category_minus_one,category], grams.get([2,category_minus_one,category])+1); 49 | grams.set([3,category_minus_two,category_minus_one,category], grams.get([3,category_minus_two,category_minus_one,category])+1); 50 | 51 | if(category === 'STOP'){ 52 | category_minus_one = '*'; 53 | category_minus_two = '*'; 54 | grams.set([2,category_minus_two,category_minus_one], grams.get([2,category_minus_two,category_minus_one])+1);// HACK!!!! 55 | } 56 | else{ 57 | category_minus_two = category_minus_one; 58 | category_minus_one = category; 59 | } 60 | } 61 | return {'grams':grams, 'word_tags':word_tags}; 62 | } 63 | 64 | function emission(word,category,word_tags,grams){ 65 | var numerator = word_tags.get([word,category]); 66 | var denominator = grams.get(['1',category]); 67 | if(denominator == 0 ){ 68 | return 0; 69 | } 70 | return numerator/denominator; 71 | } 72 | 73 | function conditionalTrigramProbability(z,x,y,grams){ 74 | var numerator = grams.get(['3',x,y,z]); 75 | var denominator = grams.get(['2',x,y]); 76 | if(denominator == 0 ){ 77 | return 0; 78 | } 79 | return numerator/denominator; 80 | } 81 | 82 | function rarify(data,rareSymbol,rareThreshold){ 83 | // NOTE THIS IS CHANGING UNDERLYING DATA STRUCTURE ... 84 | var word_tags = data.word_tags; 85 | //debugger 86 | 87 | // seems like we should initialize the rare keyword 88 | // although rare should never be one I guess - makes no sense ... 89 | // TODO keys method for Hash object? 90 | for(var word in word_tags.hash){ 91 | var sum = 0; 92 | for(var category in word_tags.get([word])){ 93 | sum+= word_tags.get([word,category]); 94 | } 95 | if(sum= max){ 154 | max = temp; 155 | max_w = w; 156 | } 157 | } 158 | pi.set([k,u,v],max); 159 | // TODO calculate backpointer 160 | // bp(k,u,v) = arg max (π(k−1,w,u)×q(v|w,u)×e(xk|v)) w∈Sk−2 161 | bp.set([k,u,v],max_w); 162 | } 163 | } 164 | } 165 | 166 | // IDEALLY I WOULD BE UNDERSTANDING ALL THIS AT A LOWER LEVEL .... OR SHOULD WE JUST GO BACK TO FAQBOT? 167 | // NEED SIMPLER COMPONENTS AND TEST DATA TO CHECK THIS IS ALL WORKING ... 168 | 169 | //Return max_[u element of S_n-1,v element of S_n] (pi(n,u,v) x q(STOP|u,v)) 170 | //debugger 171 | var max = 0; 172 | var y = {}; 173 | var temp = 0; 174 | for(var u in getSet(n-2)){ 175 | for(var v in getSet(n-1)){ 176 | temp = pi.get([n-1,u,v]) * conditionalTrigramProbability('STOP',u,v,grams); 177 | if(temp >= max){ 178 | max = temp; 179 | //Set (yn−1, yn) = arg max(u,v) (π(n, u, v) × q(STOP|u, v)) 180 | y[n-2] = u; 181 | y[n-1] = v; 182 | } 183 | } 184 | } 185 | //debugger 186 | // For k=(n−2)...1,yk = bp(k+2,y_k+1,y_k+2) 187 | for(var k = n-3; k>=0;k--){ 188 | y[k] = bp.get([k+2,y[k+1],y[k+2]]); 189 | } 190 | 191 | return {tag_sequence:y,max:max}; 192 | } 193 | 194 | function tag(devData, result, rareSymbol){ 195 | var lines = devData.split('\n'); 196 | var word_tags = result.word_tags; 197 | for(var i in lines){ 198 | var word = lines[i]; 199 | // so I need the emission probabilities looked up by word 200 | var highest = 0; 201 | var output = ''; 202 | // if we haven't encountered word we need to assign using _RARE_ 203 | if(word !== ''){ 204 | //debugger 205 | if(word_tags.get([word]) === 0){ 206 | word = rareSymbol; 207 | } 208 | for(var category in word_tags.get([word])){ 209 | var emission = word_tags.get([word,category])/result.grams.get([1,category]); 210 | if(emission > highest){ 211 | highest = emission; 212 | output = category; 213 | } 214 | } 215 | lines[i] = lines[i]+' '+output; 216 | lines[i] = lines[i].trim() 217 | } 218 | // would like to return the data, but need to output file for NLP 219 | } 220 | return lines.join('\n').trim(); 221 | } // all getting a bit smelly - could we have driven this with more fine-grained tests. 222 | 223 | /* 224 | 225 | 1 WORDTAG O mind 226 | 20 WORDTAG O resting 227 | 1 WORDTAG I-GENE SOX 228 | 2 WORDTAG I-GENE holoenzyme 229 | 2 WORDTAG I-GENE hydrolase 230 | 2 WORDTAG I-GENE barley 231 | 2 WORDTAG O glotticq 232 | 233 | */ 234 | -------------------------------------------------------------------------------- /h1-p/ViterbiSpec.js: -------------------------------------------------------------------------------- 1 | describe("Viterbi", function() { 2 | var sentences = []; 3 | var answers = []; 4 | var gene_train_head = ""; 5 | 6 | var rareKeyword = '_RARE_' 7 | 8 | var gene_train_head_direct = "Comparison O\n\ 9 | Comparison O\n\ 10 | with O\n\ 11 | alkaline I-GENE\n\ 12 | phosphatases I-GENE\n\ 13 | and O\n\ 14 | 5 I-GENE\n\ 15 | - I-GENE\n\ 16 | nucleotidase I-GENE\n\ 17 | \n\ 18 | Pharmacologic O\n"; 19 | // at the moment these all get assigned I-GENE as our micro training data has 20 | // no words that aren't rare to I-GENE and O are given equal prob., and the first 21 | // category (I-GENE) is assigned to everything ... 22 | // a better test would use a lower threshold 23 | var gene_key_head_direct = "BACKGROUND I-GENE\n\ 24 | : I-GENE\n\ 25 | Ischemic I-GENE\n\ 26 | heart I-GENE\n\ 27 | disease I-GENE\n\ 28 | is I-GENE\n\ 29 | the I-GENE\n\ 30 | primary I-GENE\n\ 31 | cause I-GENE\n\ 32 | of O\n"; 33 | 34 | // what's the approach to test ajax calls in jasmine 35 | //$.get('http://127.0.0.1/~sam/Github/faqbot/h1-p/gene.train.head', function(data) { 36 | // so issue here is that we'd be quite happy to block waiting for this data 37 | // having pulled it in a single time ... 38 | // TODO make sure this blocks before we get to specs 39 | //gene_train_head = data; 40 | 41 | //}, "text"); 42 | 43 | describe("counting", function () { 44 | var callback,trainingData,devData,keyData; 45 | 46 | beforeEach(function() { 47 | callback = jasmine.createSpy(); 48 | callAjax(callback,'gene.train.head2'); 49 | waitsFor(function() { 50 | return callback.callCount > 0; 51 | }); 52 | runs(function() { 53 | trainingData = callback.mostRecentCall.args[0]; 54 | }); 55 | 56 | callback2 = jasmine.createSpy(); 57 | callAjax(callback2,'gene.dev.head'); 58 | waitsFor(function() { 59 | return callback2.callCount > 0; 60 | }); 61 | runs(function() { 62 | devData = callback2.mostRecentCall.args[0]; 63 | }); 64 | 65 | callback3 = jasmine.createSpy(); 66 | callAjax(callback3,'gene.key.head'); 67 | waitsFor(function() { 68 | return callback3.callCount > 0; 69 | }); 70 | runs(function() { 71 | keyData = callback3.mostRecentCall.args[0]; 72 | }); 73 | 74 | }); 75 | 76 | it("should be able to generate the correct frequency counts", function() { 77 | var result = count(trainingData); 78 | var word_tags = result.word_tags; 79 | //debugger 80 | // this is a subset of the correct counts ... 81 | expect(word_tags.get(['Comparison','O'])).toEqual(2); 82 | expect(word_tags.get(['Pharmacologic','O'])).toEqual(1); 83 | expect(word_tags.get(['and','O'])).toEqual(1); 84 | expect(word_tags.get(['with','O'])).toEqual(1); 85 | expect(word_tags.get(['alkaline','I-GENE'])).toEqual(1); 86 | expect(word_tags.get(['phosphatases','I-GENE'])).toEqual(1); 87 | expect(word_tags.get(['5','I-GENE'])).toEqual(1); 88 | expect(word_tags.get(['-','I-GENE'])).toEqual(1); 89 | expect(word_tags.get(['nucleotidase','I-GENE'])).toEqual(1); 90 | 91 | // note the above is checking the old small training set, but we we are pulling in a bigger chunk now 92 | 93 | var grams = result.grams; 94 | expect(grams.get(['1','O'])).toEqual(43); 95 | expect(grams.get(['1','I-GENE'])).toEqual(5); 96 | expect(grams.get(['2','*','*'])).toEqual(3); 97 | expect(grams.get(['2','*','O'])).toEqual(3); 98 | expect(grams.get(['2','*','I-GENE'])).toEqual(0); 99 | expect(grams.get(['2','O','I-GENE'])).toEqual(2); 100 | expect(grams.get(['2','O','O'])).toEqual(39); 101 | expect(grams.get(['2','I-GENE','O'])).toEqual(1); 102 | expect(grams.get(['2','I-GENE','I-GENE'])).toEqual(3); 103 | expect(grams.get(['3','*','*','*'])).toEqual(0); 104 | expect(grams.get(['3','*','*','O'])).toEqual(3); 105 | expect(grams.get(['3','*','O','O'])).toEqual(3); 106 | expect(grams.get(['3','O','O','O'])).toEqual(36); 107 | }); 108 | 109 | it("should be able to generate the correct frequency counts with infrequent cutoff", function() { 110 | var result = rarify(count(trainingData),'_RARE_',5); 111 | var word_tags = result.word_tags; 112 | //debugger 113 | // this is a subset of the correct counts ... 114 | expect(word_tags.get(['Comparison'])).toEqual(0); 115 | expect(word_tags.get(['Pharmacologic'])).toEqual(0); 116 | expect(word_tags.get(['and'])).toEqual(0); 117 | expect(word_tags.get(['with'])).toEqual(0); 118 | expect(word_tags.get(['alkaline'])).toEqual(0); 119 | expect(word_tags.get(['phosphatases'])).toEqual(0); 120 | expect(word_tags.get(['5'])).toEqual(0); 121 | expect(word_tags.get(['-'])).toEqual(0); 122 | expect(word_tags.get(['nucleotidase'])).toEqual(0); 123 | expect(word_tags.get(['_RARE_','O'])).toEqual(43); 124 | expect(word_tags.get(['_RARE_','I-GENE'])).toEqual(5); 125 | // should also be checking that words are deleted? or actually 126 | // what is the behaviour we want here? 127 | 128 | var grams = result.grams; 129 | expect(grams.get(['1','O'])).toEqual(43); 130 | expect(grams.get(['1','I-GENE'])).toEqual(5); 131 | }); 132 | 133 | it("should be able to read in the dev file and tag it", function() { 134 | var c = count(trainingData); 135 | var result = rarify(c,rareKeyword,2); 136 | expect(tag(devData, result, rareKeyword)).toEqual(gene_key_head_direct.trim()); 137 | }); 138 | 139 | it("should be able to calculate HMM Conditional Trigram probabilities", function() { 140 | grams = new Hash({1:{},2:{},3:{}},0); 141 | grams.set(['3','*','*','O'],0.0); 142 | grams.set(['2','*','*'],0.0); 143 | expect(conditionalTrigramProbability('O','*','*',grams)).toEqual(0); 144 | grams.set(['3','*','*','O'],0.1); 145 | grams.set(['2','*','*'],0.5); 146 | expect(conditionalTrigramProbability('O','*','*',grams)).toEqual(0.2); 147 | }); 148 | 149 | it("should be able to calculate HMM Trigram probabilities given a count object", function() { 150 | var c = count(trainingData); 151 | var grams = c.grams; 152 | expect(conditionalTrigramProbability('O','*','*',grams)).toEqual(grams.get(['3','*','*','O'])/grams.get(['2','*','*'])); 153 | expect(conditionalTrigramProbability('I-GENE','*','*',grams)).toEqual(0); 154 | expect(conditionalTrigramProbability('I-GENE','I-GENE','*',grams)).toEqual(0); 155 | expect(conditionalTrigramProbability('STOP','I-GENE','I-GENE',grams)).toEqual(1/3); 156 | }); 157 | 158 | it("should be able to calculate emission probabilities", function() { 159 | var c = count(trainingData); 160 | var grams = c.grams; 161 | var word_tags = c.word_tags; 162 | expect(emission('Comparison','O', word_tags, grams)).toEqual(2/43); 163 | expect(emission('Blah','O', word_tags, grams)).toEqual(0); 164 | expect(emission('alkaline','I-GENE', word_tags, grams)).toEqual(0.2); 165 | expect(emission('Comparison','I-GENE', word_tags, grams)).toEqual(0); 166 | expect(emission('alkaline','O', word_tags, grams)).toEqual(0); 167 | // TODO would be good to be checking for divide by zero 168 | grams.set(['1','O'],0); 169 | expect(emission('Comparison','O', word_tags, grams)).toEqual(0); 170 | }); 171 | 172 | it("should be able to compute the viterbi algorithm", function() { 173 | var c = count(trainingData); 174 | var result = rarify(c,rareKeyword,2); 175 | var result2 = viterbi("Comparison with alkaline",result); 176 | expect(result2.tag_sequence).toEqual({0:'O',1:'O',2:'O'}); 177 | expect(result2.max).toEqual(0.001296748609757334); 178 | // not sure if the above are actually correct, but they are at least sensible 179 | // Next step is to see what tagging we get on the gene.dev set 180 | 181 | }); 182 | 183 | it("should be able to get appropriate sets for possible tags at each position in a sentence", function() { 184 | expect(getSet(-1)).toEqual({'*':undefined}); 185 | expect(getSet(0)).toEqual({'*':undefined}); 186 | expect(getSet(1)).toEqual({'O':undefined,'I-GENE':undefined,'STOP':undefined}); 187 | expect(getSet(100)).toEqual({'O':undefined,'I-GENE':undefined,'STOP':undefined}); 188 | 189 | }); 190 | 191 | }); 192 | 193 | 194 | describe("ajax", function () { 195 | var callback,data; 196 | 197 | beforeEach(function() { 198 | callback = jasmine.createSpy(); 199 | callAjax(callback,'gene.train.head'); 200 | waitsFor(function() { 201 | return callback.callCount > 0; 202 | }); 203 | runs(function() { 204 | data = callback.mostRecentCall.args[0]; 205 | }); 206 | }); 207 | 208 | it("should make a real AJAX request", function () { 209 | runs(function() { 210 | expect(callback.mostRecentCall.args[0]).toEqual(gene_train_head_direct); 211 | }); 212 | }); 213 | }); 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | }); 223 | -------------------------------------------------------------------------------- /thirdparty/jquery.uri.js: -------------------------------------------------------------------------------- 1 | /* 2 | * $ URIs @VERSION 3 | * 4 | * Copyright (c) 2008,2009 Jeni Tennison 5 | * Licensed under the MIT (MIT-LICENSE.txt) 6 | * 7 | */ 8 | /** 9 | * @fileOverview $ URIs 10 | * @author Jeni Tennison 11 | * @copyright (c) 2008,2009 Jeni Tennison 12 | * @license MIT license (MIT-LICENSE.txt) 13 | * @version 1.0 14 | */ 15 | /** 16 | * @class 17 | * @name jQuery 18 | * @exports $ as jQuery 19 | * @description rdfQuery is a jQuery plugin. The only fields and methods listed here are those that come as part of the rdfQuery library. 20 | */ 21 | (function ($) { 22 | 23 | var 24 | mem = {}, 25 | uriRegex = /^(([a-z][\-a-z0-9+\.]*):)?(\/\/([^\/?#]+))?([^?#]*)?(\?([^#]*))?(#(.*))?$/i, 26 | docURI, 27 | 28 | parseURI = function (u) { 29 | var m = u.match(uriRegex); 30 | if (m === null) { 31 | throw "Malformed URI: " + u; 32 | } 33 | return { 34 | scheme: m[1] ? m[2].toLowerCase() : undefined, 35 | authority: m[3] ? m[4] : undefined, 36 | path: m[5] || '', 37 | query: m[6] ? m[7] : undefined, 38 | fragment: m[8] ? m[9] : undefined 39 | }; 40 | }, 41 | 42 | removeDotSegments = function (u) { 43 | var r = '', m = []; 44 | if (/\./.test(u)) { 45 | while (u !== undefined && u !== '') { 46 | if (u === '.' || u === '..') { 47 | u = ''; 48 | } else if (/^\.\.\//.test(u)) { // starts with ../ 49 | u = u.substring(3); 50 | } else if (/^\.\//.test(u)) { // starts with ./ 51 | u = u.substring(2); 52 | } else if (/^\/\.(\/|$)/.test(u)) { // starts with /./ or consists of /. 53 | u = '/' + u.substring(3); 54 | } else if (/^\/\.\.(\/|$)/.test(u)) { // starts with /../ or consists of /.. 55 | u = '/' + u.substring(4); 56 | r = r.replace(/\/?[^\/]+$/, ''); 57 | } else { 58 | m = u.match(/^(\/?[^\/]*)(\/.*)?$/); 59 | u = m[2]; 60 | r = r + m[1]; 61 | } 62 | } 63 | return r; 64 | } else { 65 | return u; 66 | } 67 | }, 68 | 69 | merge = function (b, r) { 70 | if (b.authority !== '' && (b.path === undefined || b.path === '')) { 71 | return '/' + r; 72 | } else { 73 | return b.path.replace(/[^\/]+$/, '') + r; 74 | } 75 | }; 76 | 77 | /** 78 | * Creates a new jQuery.uri object. This should be invoked as a method rather than constructed using new. 79 | * @class Represents a URI 80 | * @param {String} [relative=''] 81 | * @param {String|jQuery.uri} [base] Defaults to the base URI of the page 82 | * @returns {jQuery.uri} The new jQuery.uri object. 83 | * @example uri = jQuery.uri('/my/file.html'); 84 | */ 85 | $.uri = function (relative, base) { 86 | var uri; 87 | relative = relative || ''; 88 | if (mem[relative]) { 89 | return mem[relative]; 90 | } 91 | base = base || $.uri.base(); 92 | if (typeof base === 'string') { 93 | base = $.uri.absolute(base); 94 | } 95 | uri = new $.uri.fn.init(relative, base); 96 | if (mem[uri]) { 97 | return mem[uri]; 98 | } else { 99 | mem[uri] = uri; 100 | return uri; 101 | } 102 | }; 103 | 104 | $.uri.fn = $.uri.prototype = { 105 | /** 106 | * The scheme used in the URI 107 | * @type String 108 | */ 109 | scheme: undefined, 110 | /** 111 | * The authority used in the URI 112 | * @type String 113 | */ 114 | authority: undefined, 115 | /** 116 | * The path used in the URI 117 | * @type String 118 | */ 119 | path: undefined, 120 | /** 121 | * The query part of the URI 122 | * @type String 123 | */ 124 | query: undefined, 125 | /** 126 | * The fragment part of the URI 127 | * @type String 128 | */ 129 | fragment: undefined, 130 | 131 | init: function (relative, base) { 132 | var r = {}; 133 | base = base || {}; 134 | $.extend(this, parseURI(relative)); 135 | if (this.scheme === undefined) { 136 | this.scheme = base.scheme; 137 | if (this.authority !== undefined) { 138 | this.path = removeDotSegments(this.path); 139 | } else { 140 | this.authority = base.authority; 141 | if (this.path === '') { 142 | this.path = base.path; 143 | if (this.query === undefined) { 144 | this.query = base.query; 145 | } 146 | } else { 147 | if (!/^\//.test(this.path)) { 148 | this.path = merge(base, this.path); 149 | } 150 | this.path = removeDotSegments(this.path); 151 | } 152 | } 153 | } 154 | if (this.scheme === undefined) { 155 | throw "Malformed URI: URI is not an absolute URI and no base supplied: " + relative; 156 | } 157 | return this; 158 | }, 159 | 160 | /** 161 | * Resolves a relative URI relative to this URI 162 | * @param {String} relative 163 | * @returns jQuery.uri 164 | */ 165 | resolve: function (relative) { 166 | return $.uri(relative, this); 167 | }, 168 | 169 | /** 170 | * Creates a relative URI giving the path from this URI to the absolute URI passed as a parameter 171 | * @param {String|jQuery.uri} absolute 172 | * @returns String 173 | */ 174 | relative: function (absolute) { 175 | var aPath, bPath, i = 0, j, resultPath = [], result = ''; 176 | if (typeof absolute === 'string') { 177 | absolute = $.uri(absolute, {}); 178 | } 179 | if (absolute.scheme !== this.scheme || 180 | absolute.authority !== this.authority) { 181 | return absolute.toString(); 182 | } 183 | if (absolute.path !== this.path) { 184 | aPath = absolute.path.split('/'); 185 | bPath = this.path.split('/'); 186 | if (aPath[1] !== bPath[1]) { 187 | result = absolute.path; 188 | } else { 189 | while (aPath[i] === bPath[i]) { 190 | i += 1; 191 | } 192 | j = i; 193 | for (; i < bPath.length - 1; i += 1) { 194 | resultPath.push('..'); 195 | } 196 | for (; j < aPath.length; j += 1) { 197 | resultPath.push(aPath[j]); 198 | } 199 | result = resultPath.join('/'); 200 | } 201 | result = absolute.query === undefined ? result : result + '?' + absolute.query; 202 | result = absolute.fragment === undefined ? result : result + '#' + absolute.fragment; 203 | return result; 204 | } 205 | if (absolute.query !== undefined && absolute.query !== this.query) { 206 | return '?' + absolute.query + (absolute.fragment === undefined ? '' : '#' + absolute.fragment); 207 | } 208 | if (absolute.fragment !== undefined && absolute.fragment !== this.fragment) { 209 | return '#' + absolute.fragment; 210 | } 211 | return ''; 212 | }, 213 | 214 | /** 215 | * Returns the URI as an absolute string 216 | * @returns String 217 | */ 218 | toString: function () { 219 | var result = ''; 220 | if (this._string) { 221 | return this._string; 222 | } else { 223 | result = this.scheme === undefined ? result : (result + this.scheme + ':'); 224 | result = this.authority === undefined ? result : (result + '//' + this.authority); 225 | result = result + this.path; 226 | result = this.query === undefined ? result : (result + '?' + this.query); 227 | result = this.fragment === undefined ? result : (result + '#' + this.fragment); 228 | this._string = result; 229 | return result; 230 | } 231 | } 232 | 233 | }; 234 | 235 | $.uri.fn.init.prototype = $.uri.fn; 236 | 237 | /** 238 | * Creates a {@link jQuery.uri} from a known-to-be-absolute URI 239 | * @param {String} 240 | * @returns {jQuery.uri} 241 | */ 242 | $.uri.absolute = function (uri) { 243 | return $.uri(uri, {}); 244 | }; 245 | 246 | /** 247 | * Creates a {@link jQuery.uri} from a relative URI and an optional base URI 248 | * @returns {jQuery.uri} 249 | * @see jQuery.uri 250 | */ 251 | $.uri.resolve = function (relative, base) { 252 | return $.uri(relative, base); 253 | }; 254 | 255 | /** 256 | * Creates a string giving the relative path from a base URI to an absolute URI 257 | * @param {String} absolute 258 | * @param {String} base 259 | * @returns {String} 260 | */ 261 | $.uri.relative = function (absolute, base) { 262 | return $.uri(base, {}).relative(absolute); 263 | }; 264 | 265 | /** 266 | * Returns the base URI of the page 267 | * @returns {jQuery.uri} 268 | */ 269 | $.uri.base = function () { 270 | return $(document).base(); 271 | }; 272 | 273 | /** 274 | * Returns the base URI in scope for the first selected element 275 | * @methodOf jQuery# 276 | * @name jQuery#base 277 | * @returns {jQuery.uri} 278 | * @example baseURI = $('img').base(); 279 | */ 280 | $.fn.base = function () { 281 | var base = $(this).parents().andSelf().find('base').attr('href'), 282 | doc = $(this)[0].ownerDocument || document, 283 | docURI = $.uri.absolute(doc.location === null ? document.location.href : doc.location.href); 284 | return base === undefined ? docURI : $.uri(base, docURI); 285 | }; 286 | 287 | })(jQuery); 288 | -------------------------------------------------------------------------------- /query.js: -------------------------------------------------------------------------------- 1 | // get the object we'll use for persistent storage 2 | var storage = getStorage(); 3 | 4 | initStorage(storage); 5 | 6 | function query(storage, sentence) { 7 | // check for sentence word by word in list (hashtable) 8 | var words = sentence.split(" "); 9 | var lookup = ""; 10 | var prepend = ""; 11 | // seems like persistence is the key step here ... 12 | // dialogue history to a gist 13 | // can we check changes to the knowledge base directly into github ... 14 | // then we would get versioning .... 15 | // would be nice to have local stub of that to allow for testing and working when off the grid ... 16 | // if we stick stuff in a json file in github what happens to our query speed? worry about optimizing that later ... 17 | // gradually load more and more of that data into the memory of the browser in the background as the human is typing so that 18 | // we can do immediate in memory search on the JSON data? 19 | 20 | // things to do here 21 | // 1. grab first response from google on query on what user said 22 | // --- need way to grab URL of request from Google 23 | // 2. process assertions, e.g. 'there is a course called Mobile Design & Programming' 24 | // --- that relies on some form of regex and also persistence 25 | // 3. process queries, e.g. what is the start date of Mobile Design & Programming 26 | // --- want to be able to check for bigrams/trigrams having removed stopwords 27 | // 4. want to log transcript to some persistence store 28 | // 5. could add eliza/twss code? 29 | // 6. could add joke of the day code - looks like we can't due to cross-server scripting constraint 30 | // 7. chuck norris code might work 31 | 32 | 33 | // so perhaps we could just create a json structure to reflect the assertion ... 34 | // I guess ultimately we really want that flexible parse structure to handle 35 | // a) Mobile is a new course 36 | // b) I heard that there's a new course called Mobile 37 | // c) Have you signed up for that new Mobile course? 38 | var match = matchEntityAssertionRegex(sentence); 39 | // want to check is match is undefined or not ... 40 | var response = "OK"; 41 | 42 | if(match !== null){ 43 | response = match.name + ' is a ' +match.object; 44 | 45 | /* bit ugly using the name as identifier, might be better 46 | to use something like a GUID to represent new entities 47 | and name them using a separate foaf:name triple. However, then 48 | we'd need a way to recognise existing entities. 49 | */ 50 | storage.storeEntity(match.object,match.name) 51 | 52 | // _:John a "person" ; foaf:name "John" 53 | // _:John _:favourite_colour "blue" ; foaf:name "blue" 54 | // _:favourite_color type_of_relation "between people" ???? 55 | 56 | // "John" a "person" ??? 57 | // foaf:name 58 | // foaf:type ? 59 | 60 | } 61 | else { 62 | var properties_match = matchPropertiesRegex(sentence); 63 | 64 | if( properties_match !== null){ 65 | storage.storeProperty(properties_match.object, properties_match.relation, properties_match.name); 66 | return "The " + properties_match.relation +" for " + properties_match.object + " is " + properties_match.name; 67 | } 68 | else{ 69 | response = handleQuestion(storage,sentence); 70 | } 71 | 72 | } 73 | 74 | /* 75 | $.getJSON("http://www.joke-db.com/widgets/src/wp/clean/monkey/123?callback=?",null 76 | { 77 | url: 78 | }).done(function ( data ) { 79 | console.log("test"); 80 | if( console && console.log ) { 81 | console.log("Sample of data:", data); 82 | } 83 | }); */ 84 | // $.icndb.getRandomJoke(12) // this was for chuck norris 85 | return response; 86 | } 87 | 88 | function matchEntityAssertionRegex(sentence) { 89 | // Using named capture and flag x (free-spacing and line comments) 90 | var assert = XRegExp('(? (T|t)here(\\si|\')s\\sa ) \\s? # assert \n' + 91 | '(? .* ) \\s # object \n' + 92 | '(? called ) \\s? # called \n' + 93 | '(? .* ) # name ', 'x'); 94 | return XRegExp.exec(sentence, assert); 95 | } 96 | 97 | function matchPropertiesRegex(sentence){ 98 | //Unreal Engine has a website http://unrealengine.com ---> _:Unreal_Engine has_a_website http://unrealengine.com 99 | //Unreal Engine's website is http://unrealengine.com 100 | var assert = XRegExp('(? .+ ) \\s # object \n' + 101 | '(? has\\sa ) \\s # has_a \n' + 102 | '(? .+ ) \\s # relation \n' + 103 | '(? .+ ) # name ', 'x'); 104 | return XRegExp.exec(sentence, assert); 105 | } 106 | 107 | // TODO add this to String itself e.g. String.prototype.removeStopWords = function() 108 | function removePunctuation(sentence){ 109 | return sentence.replace(/[^\w\s]/g,''); 110 | } 111 | 112 | function getPossibleEntities(sentence){ 113 | sentence = removePunctuation(sentence); // could get this function in String itself 114 | var words = sentence.removeStopWords().split(' '); 115 | var bigrams = natural.NGrams.bigrams(words); 116 | for(var i in bigrams){ // e.g. "Unreal Engine" 117 | words.push(bigrams[i].join('_')); // e.g. "Unreal_Engine" 118 | } 119 | return words; 120 | } 121 | 122 | function handleQuestion(storage, sentence) { 123 | // now this really needs refactoring!!! 124 | var response = 'why?'; 125 | var words = getPossibleEntities(sentence); 126 | var type = ''; 127 | var result = {}; 128 | // TODO return all other relations for that thing, e.g. website etc. 129 | for(var i in words){ 130 | // _:John a ?type 131 | result = storage.queryEntity(words[i]); 132 | if(result !== undefined){ 133 | var obj = words[i].replace('_',' ') 134 | // to query a specific relation we have to look for all possible relations 135 | // and see if any match any of the other words in the sentence 136 | //storage.queryProperty(name,relation); 137 | var allProps = storage.queryAllProperties(obj); 138 | response = "I know that "+obj+" is a " + result.type; 139 | for (var nr in allProps) { 140 | var relation = allProps[nr].relation; 141 | var name = allProps[nr].name; 142 | if ((result.type != name) && (relation.indexOf("foaf") == -1)) { 143 | debugger 144 | if(words.some(function(x){return x === relation})){ 145 | response = "The " + relation + " for " + obj + " is " + name; 146 | break; 147 | }else{ 148 | response += " and " + relation + " for " + obj + " is " + name; 149 | } 150 | } 151 | } 152 | break; 153 | } 154 | } 155 | return response; 156 | } 157 | 158 | 159 | function updateHistory(who, sentence) { 160 | var prefix = ''; 161 | if (who == 'bot') { 162 | prefix = 'Bot: '; 163 | } else if (who == 'human') { 164 | prefix = 'You: '; 165 | } 166 | 167 | var fmt = ''+prefix+sentence+'
'; 168 | $("div#history").append(fmt); 169 | } 170 | 171 | function showResponse(who, what) { 172 | storage.addToTranscript(who, what); 173 | updateHistory(who, what); 174 | } 175 | 176 | /* 177 | * handle commands to the bot that should not appear in the transcipt or 178 | * affect the KB. 179 | */ 180 | function handleCommand(sentence) { 181 | if (sentence == 'show kb') { 182 | alert(storage.getKnowledgeBaseAsText()); 183 | } else if (sentence == 'show transcript') { 184 | alert(storage.getTranscript()); 185 | } else { 186 | return false; // was not a command 187 | } 188 | } 189 | 190 | function handleChat(sentence) { 191 | if (!handleCommand(sentence)) { 192 | showResponse('human', sentence + "
"); 193 | showResponse('bot', query(storage, sentence) + "
"); 194 | storage.save(); 195 | } 196 | 197 | return false; 198 | } 199 | 200 | 201 | // not functional yet - just Sam playing around with github accesss 202 | // TODO move this to storage.js 203 | function storageGithub(){ 204 | var github = new Github({ 205 | token: "OAUTH_TOKEN", 206 | auth: "oauth" 207 | }); 208 | var repo = github.getRepo('tansaku','faqbot'); 209 | repo.read('master', 'initial_kb.json', function(err, data) {}); 210 | repo = repo + "new data"; 211 | repo.write('master', 'initial_kb.json', repo, 'new data', function(err) {}); 212 | } 213 | 214 | 215 | // If storage is empty (this is the first time we are called) then 216 | // add some basic knowledge 217 | function initStorage(storage) { 218 | if (storage.isEmpty()) { 219 | // load the initial knowledge base from a text file in turtle format 220 | $.get('initial_kb.txt', function(turtle) { 221 | storage.loadKnowledgeBaseFromString(turtle); 222 | //alert("from local file: " + turtle); 223 | storage.save(); 224 | }, 'text'); 225 | } else { 226 | storage.load(); 227 | } 228 | } 229 | 230 | function showTranscript(storage) { 231 | var transcript = storage.getTranscript(); 232 | if (transcript.length > 0) { 233 | for (var i=0; iJeni Tennison 13 | * @copyright (c) 2008,2009 Jeni Tennison 14 | * @license MIT license (MIT-LICENSE.txt) 15 | * @version 1.0 16 | * @requires jquery.uri.js 17 | */ 18 | 19 | /*global jQuery */ 20 | (function ($) { 21 | 22 | var 23 | xmlNs = 'http://www.w3.org/XML/1998/namespace', 24 | xmlnsNs = 'http://www.w3.org/2000/xmlns/', 25 | 26 | xmlnsRegex = /\sxmlns(?::([^ =]+))?\s*=\s*(?:"([^"]*)"|'([^']*)')/g, 27 | 28 | ncNameChar = '[-A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u10000-\uEFFFF\.0-9\u00B7\u0300-\u036F\u203F-\u2040]', 29 | ncNameStartChar = '[\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3\u4E00-\u9FA5\u3007\u3021-\u3029_]', 30 | ncNameRegex = new RegExp('^' + ncNameStartChar + ncNameChar + '*$'); 31 | 32 | 33 | /** 34 | * Returns the namespaces declared in the scope of the first selected element, or 35 | * adds a namespace declaration to all selected elements. Pass in no parameters 36 | * to return all namespaces bindings on the first selected element. If only 37 | * the prefix parameter is specified, this method will return the namespace 38 | * URI that is bound to the specified prefix on the first element in the selection 39 | * If the prefix and uri parameters are both specified, this method will 40 | * add the binding of the specified prefix and namespace URI to all elements 41 | * in the selection. 42 | * @methodOf jQuery# 43 | * @name jQuery#xmlns 44 | * @param {String} [prefix] Restricts the namespaces returned to only the namespace with the specified namespace prefix. 45 | * @param {String|jQuery.uri} [uri] Adds a namespace declaration to the selected elements that maps the specified prefix to the specified namespace. 46 | * @param {Object} [inherited] A map of inherited namespace bindings. 47 | * @returns {Object|jQuery.uri|jQuery} 48 | * @example 49 | * // Retrieve all of the namespace bindings on the HTML document element 50 | * var nsMap = $('html').xmlns(); 51 | * @example 52 | * // Retrieve the namespace URI mapped to the 'dc' prefix on the HTML document element 53 | * var dcNamespace = $('html').xmlns('dc'); 54 | * @example 55 | * // Create a namespace declaration that binds the 'dc' prefix to the URI 'http://purl.org/dc/elements/1.1/' 56 | * $('html').xmlns('dc', 'http://purl.org/dc/elements/1.1/'); 57 | */ 58 | $.fn.xmlns = function (prefix, uri, inherited) { 59 | var 60 | elem = this.eq(0), 61 | ns = elem.data('xmlns'), 62 | e = elem[0], a, p, i, 63 | decl = prefix ? 'xmlns:' + prefix : 'xmlns', 64 | value, 65 | tag, found = false; 66 | if (uri === undefined) { 67 | if (prefix === undefined) { // get the in-scope declarations on the first element 68 | if (!ns) { 69 | ns = { 70 | // xml: $.uri(xmlNs) 71 | }; 72 | if (e.attributes && e.attributes.getNamedItemNS) { 73 | for (i = 0; i < e.attributes.length; i += 1) { 74 | a = e.attributes[i]; 75 | if (/^xmlns(:(.+))?$/.test(a.nodeName)) { 76 | prefix = /^xmlns(:(.+))?$/.exec(a.nodeName)[2] || ''; 77 | value = a.nodeValue; 78 | if (prefix === '' || (value !== '' && value !== xmlNs && value !== xmlnsNs && ncNameRegex.test(prefix) && prefix !== 'xml' && prefix !== 'xmlns')) { 79 | ns[prefix] = $.uri(a.nodeValue); 80 | found = true; 81 | } 82 | } 83 | } 84 | } else { 85 | tag = /<[^>]+>/.exec(e.outerHTML); 86 | a = xmlnsRegex.exec(tag); 87 | while (a !== null) { 88 | prefix = a[1] || ''; 89 | value = a[2] || a[3]; 90 | if (prefix === '' || (value !== '' && value !== xmlNs && value !== xmlnsNs && ncNameRegex.test(prefix) && prefix !== 'xml' && prefix !== 'xmlns')) { 91 | ns[prefix] = $.uri(a[2] || a[3]); 92 | found = true; 93 | } 94 | a = xmlnsRegex.exec(tag); 95 | } 96 | xmlnsRegex.lastIndex = 0; 97 | } 98 | inherited = inherited || (e.parentNode.nodeType === 1 ? elem.parent().xmlns() : {}); 99 | ns = found ? $.extend({}, inherited, ns) : inherited; 100 | elem.data('xmlns', ns); 101 | } 102 | return ns; 103 | } else if (typeof prefix === 'object') { // set the prefix mappings defined in the object 104 | for (p in prefix) { 105 | if (typeof prefix[p] === 'string' && ncNameRegex.test(p)) { 106 | this.xmlns(p, prefix[p]); 107 | } 108 | } 109 | this.find('*').andSelf().removeData('xmlns'); 110 | return this; 111 | } else { // get the in-scope declaration associated with this prefix on the first element 112 | if (!ns) { 113 | ns = elem.xmlns(); 114 | } 115 | return ns[prefix]; 116 | } 117 | } else { // set 118 | this.find('*').andSelf().removeData('xmlns'); 119 | return this.attr(decl, uri); 120 | } 121 | }; 122 | 123 | /** 124 | * Removes one or more XML namespace bindings from the selected elements. 125 | * @methodOf jQuery# 126 | * @name jQuery#removeXmlns 127 | * @param {String|Object|String[]} prefix The prefix(es) of the XML namespace bindings that are to be removed from the selected elements. 128 | * @returns {jQuery} The original jQuery object. 129 | * @example 130 | * // Remove the foaf namespace declaration from the body element: 131 | * $('body').removeXmlns('foaf'); 132 | * @example 133 | * // Remove the foo and bar namespace declarations from all h2 elements 134 | * $('h2').removeXmlns(['foo', 'bar']); 135 | * @example 136 | * // Remove the foo and bar namespace declarations from all h2 elements 137 | * var namespaces = { foo : 'http://www.example.org/foo', bar : 'http://www.example.org/bar' }; 138 | * $('h2').removeXmlns(namespaces); 139 | */ 140 | $.fn.removeXmlns = function (prefix) { 141 | var decl, p, i; 142 | if (typeof prefix === 'object') { 143 | if (prefix.length === undefined) { // assume an object representing namespaces 144 | for (p in prefix) { 145 | if (typeof prefix[p] === 'string') { 146 | this.removeXmlns(p); 147 | } 148 | } 149 | } else { // it's an array 150 | for (i = 0; i < prefix.length; i += 1) { 151 | this.removeXmlns(prefix[i]); 152 | } 153 | } 154 | } else { 155 | decl = prefix ? 'xmlns:' + prefix : 'xmlns'; 156 | this.removeAttr(decl); 157 | } 158 | this.find('*').andSelf().removeData('xmlns'); 159 | return this; 160 | }; 161 | 162 | $.fn.qname = function (name) { 163 | var m, prefix, namespace; 164 | if (name === undefined) { 165 | if (this[0].outerHTML === undefined) { 166 | name = this[0].nodeName.toLowerCase(); 167 | } else { 168 | name = /<([^ >]+)/.exec(this[0].outerHTML)[1].toLowerCase(); 169 | } 170 | } 171 | if (name === '?xml:namespace') { 172 | // there's a prefix on the name, but we can't get at it 173 | throw "XMLinHTML: Unable to get the prefix to resolve the name of this element"; 174 | } 175 | m = /^(([^:]+):)?([^:]+)$/.exec(name); 176 | prefix = m[2] || ''; 177 | namespace = this.xmlns(prefix); 178 | if (namespace === undefined && prefix !== '') { 179 | throw "MalformedQName: The prefix " + prefix + " is not declared"; 180 | } 181 | return { 182 | namespace: namespace, 183 | localPart: m[3], 184 | prefix: prefix, 185 | name: name 186 | }; 187 | }; 188 | 189 | })(jQuery); 190 | -------------------------------------------------------------------------------- /h1-p/eval_gene_tagger.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | __author__="Daniel Bauer " 4 | __date__ ="$Sep 29, 2011" 5 | 6 | import sys 7 | 8 | 9 | """ 10 | Evaluate gene tagger output by comparing it to a gold standard file. 11 | 12 | Running the script on your tagger output like this 13 | 14 | python eval_gene_tagger.py gene_dev.key your_tagger_output.dat 15 | 16 | will generate a table of results like this: 17 | 18 | Found 14071 GENES. Expected 5942 GENES; Correct: 3120. 19 | 20 | precision recall F1-Score 21 | GENE: 0.433367 0.231270 0.301593 22 | 23 | Adopted from original named entity evaluation. 24 | 25 | """ 26 | 27 | def corpus_iterator(corpus_file, with_logprob = False): 28 | """ 29 | Get an iterator object over the corpus file. The elements of the 30 | iterator contain (word, ne_tag) tuples. Blank lines, indicating 31 | sentence boundaries return (None, None). 32 | """ 33 | l = corpus_file.readline() 34 | tagfield = with_logprob and -2 or -1 35 | 36 | try: 37 | while l: 38 | line = l.strip() 39 | if line: # Nonempty line 40 | # Extract information from line. 41 | # Each line has the format 42 | # word ne_tag [log_prob] 43 | fields = line.split(" ") 44 | ne_tag = fields[tagfield] 45 | word = " ".join(fields[:tagfield]) 46 | yield word, ne_tag 47 | else: # Empty line 48 | yield (None, None) 49 | l = corpus_file.readline() 50 | except IndexError: 51 | sys.stderr.write("Could not read line: \n") 52 | sys.stderr.write("\n%s" % line) 53 | if with_logprob: 54 | sys.stderr.write("Did you forget to output log probabilities in the prediction file?\n") 55 | sys.exit(1) 56 | 57 | 58 | class NeTypeCounts(object): 59 | """ 60 | Stores true/false positive/negative counts for each NE type. 61 | """ 62 | 63 | def __init__(self): 64 | self.tp = 0 65 | self.fp = 0 66 | self.tn = 0 67 | self.fn = 0 68 | 69 | def get_precision(self): 70 | return self.tp / float(self.tp + self.fp) 71 | 72 | def get_recall(self): 73 | return self.tp / float(self.tp + self.fn) 74 | 75 | def get_accuracy(self): 76 | return (self.tp + self.tn) / float(self.tp + self.tn + self.fp + self.fn) 77 | 78 | 79 | class Evaluator(object): 80 | """ 81 | Stores global true/false positive/negative counts. 82 | """ 83 | 84 | 85 | ne_classes = ["GENE"] 86 | 87 | def __init__(self): 88 | self.tp = 0 89 | self.tn = 0 90 | self.fp = 0 91 | self.fn = 0 92 | 93 | # Initialize an object that counts true/false positives/negatives 94 | # for each NE class 95 | self.class_counts = {} 96 | for c in self.ne_classes: 97 | self.class_counts[c] = NeTypeCounts() 98 | 99 | def compare(self, gold_standard, prediction): 100 | """ 101 | Compare the prediction against a gold standard. Both objects must be 102 | generator or iterator objects that return a (word, ne_tag) tuple at a 103 | time. 104 | """ 105 | 106 | # Define a couple of tags indicating the status of each stream 107 | curr_pred_type = None # prediction stream was previously in a named entity 108 | curr_pred_start = None # a new prediction starts at the current token 109 | curr_gs_type = None # prediction stream was previously in a named entity 110 | curr_gs_start = None # a new prediction starts at the current token 111 | 112 | total = 0 113 | for gs_word, gs_tag in gold_standard: # Move through the gold standard stream 114 | pred_word, pred_tag = prediction.next() # Get the corresponding item from the prediction stream 115 | 116 | # Make sure words in both files match up 117 | if gs_word != pred_word: 118 | sys.stderr.write("Could not align gold standard and predictions in line %i.\n" % (total+1)) 119 | sys.stderr.write("Gold standard: %s Prediction file: %s\n" % (gs_word, pred_word)) 120 | sys.exit(1) 121 | 122 | # Split off the I and B tags 123 | gs_type = gs_tag==None and "O" or gs_tag.split("-")[-1] 124 | pred_type = pred_tag==None and "O" or pred_tag.split("-")[-1] 125 | 126 | # Check if a named entity ends here in either stream. 127 | # This is the case if we are currently in an entity and either 128 | # - end of sentence 129 | # - current word is marked O 130 | # - new entity starts (B - or I with different NE type) 131 | pred_ends = curr_pred_type!=None and ((pred_tag==None or pred_tag[0] in "OB") or (curr_pred_type!=pred_type and pred_tag[0]=="I")) 132 | gs_ends = curr_gs_type!=None and ((gs_tag==None or gs_tag[0] in "OB") or (curr_gs_type!=gs_type and gs_tag[0]=="I")) 133 | 134 | 135 | # Check if a named entity starts here in either stream. 136 | # This is tha case if this is not the end of a sentence and 137 | # - This is not the end of a sentence 138 | # - New entity starts (B, I after O or at begining of sentence or 139 | # I with different NE type) 140 | if pred_word!=None: 141 | pred_start = (pred_tag!=None and pred_tag[0] == "B") or (curr_pred_type==None and pred_tag[0]=="I") or \ 142 | (curr_pred_type!=None and curr_pred_type!=pred_type and pred_tag.startswith("I")) 143 | gs_starts = (gs_tag!=None and gs_tag[0] == "B") or (curr_gs_type==None and gs_tag[0]=="I") or \ 144 | (curr_gs_type!=None and curr_gs_type!=gs_type and gs_tag.startswith("I")) 145 | else: 146 | pred_start = False 147 | gs_starts = False 148 | 149 | #For debugging: 150 | #print pred_word, gs_tag, pred_tag, pred_ends, gs_ends, pred_start, gs_starts 151 | 152 | 153 | # Now try to match up named entities that end here 154 | 155 | if gs_ends and pred_ends: # GS and prediction contain a named entity that ends in the same place 156 | 157 | #If both named entities start at the same place and are of the same type 158 | if curr_gs_start == curr_pred_start and curr_gs_type == curr_pred_type: 159 | # Count true positives 160 | self.tp += 1 161 | self.class_counts[curr_pred_type].tp += 1 162 | else: #span matches, but label doesn't match: count both a true positive and a false negative 163 | self.fp += 1 164 | self.fn += 1 165 | self.class_counts[curr_pred_type].fp += 1 166 | self.class_counts[curr_gs_type].fn += 1 167 | elif gs_ends: #Didn't find the named entity in the gold standard, count false negative 168 | self.fn += 1 169 | self.class_counts[curr_gs_type].fn += 1 170 | elif pred_ends: #Named entity in the prediction doesn't match one int he gold_standard, count false positive 171 | self.fp += 1 172 | self.class_counts[curr_pred_type].fp += 1 173 | elif curr_pred_type==None and curr_pred_type==None: #matching O tag or end of sentence, count true negative 174 | self.tn += 1 175 | for c in self.ne_classes: 176 | self.class_counts[c].tn += 1 177 | 178 | # Remember that we are no longer in a named entity 179 | if gs_ends: 180 | curr_gs_type = None 181 | if pred_ends: 182 | curr_pred_type = None 183 | 184 | # If a named entity starts here, remember it's type and this position 185 | if gs_starts: 186 | curr_gs_start = total 187 | curr_gs_type = gs_type 188 | if pred_start: 189 | curr_pred_start = total 190 | curr_pred_type = pred_type 191 | total += 1 192 | 193 | def print_scores(self): 194 | """ 195 | Output a table with accuracy, precision, recall and F1 score. 196 | """ 197 | 198 | print "Found %i GENEs. Expected %i GENEs; Correct: %i.\n" % (self.tp + self.fp, self.tp + self.fn, self.tp) 199 | 200 | 201 | if self.tp + self.tn + self.fp + self.fn == 0: # There was nothing to do. 202 | acc = 1 203 | else: 204 | acc = (self.tp + self.tn) / float(self.tp + self.tn + self.fp + self.fn) 205 | 206 | if self.tp+self.fp == 0: # Prediction didn't annotate any NEs 207 | prec = 1 208 | 209 | else: 210 | prec = self.tp / float(self.tp + self.fp) 211 | 212 | 213 | if self.tp+self.fn == 0: # Prediction marked everything as a NE of the wrong type. 214 | rec = 1 215 | else: 216 | rec = self.tp / float(self.tp + self.fn) 217 | 218 | print "\t precision \trecall \t\tF1-Score" 219 | fscore = (2*prec*rec)/(prec+rec) 220 | #print "Total:\t %f\t%f\t%f" % (prec, rec, fscore) 221 | for c in self.ne_classes: 222 | c_tp = self.class_counts[c].tp 223 | c_tn = self.class_counts[c].tn 224 | c_fp = self.class_counts[c].fp 225 | c_fn = self.class_counts[c].fn 226 | #print c 227 | #print c_tp 228 | #print c_tn 229 | #print c_fp 230 | #print c_fn 231 | if (c_tp + c_tn + c_fp + c_fn) == 0: 232 | c_acc = 1 233 | else: 234 | c_acc = (c_tp + c_tn) / float(c_tp + c_tn + c_fp + c_fn) 235 | 236 | if (c_tp + c_fn) == 0: 237 | sys.stderr.write("Warning: no instances for entity type %s in gold standard.\n" % c) 238 | c_rec = 1 239 | else: 240 | c_rec = c_tp / float(c_tp + c_fn) 241 | if (c_tp + c_fp) == 0: 242 | sys.stderr.write("Warning: prediction file does not contain any instances of entity type %s.\n" % c) 243 | c_prec =1 244 | else: 245 | c_prec = c_tp / float(c_tp + c_fp) 246 | 247 | if c_prec + c_rec == 0: 248 | fscore = 0 249 | else: 250 | fscore = (2*c_prec * c_rec)/(c_prec + c_rec) 251 | print "%s:\t %f\t%f\t%f" % (c, c_prec, c_rec, fscore) 252 | 253 | 254 | def usage(): 255 | sys.stderr.write(""" 256 | Usage: python eval_gene_tagger.py [key_file] [prediction_file] 257 | Evaluate the gene-tagger output in prediction_file against 258 | the gold standard in key_file. Output accuracy, precision, 259 | recall and F1-Score.\n""") 260 | 261 | if __name__ == "__main__": 262 | 263 | if len(sys.argv)!=3: 264 | usage() 265 | sys.exit(1) 266 | gs_iterator = corpus_iterator(file(sys.argv[1])) 267 | pred_iterator = corpus_iterator(file(sys.argv[2]), with_logprob = False) 268 | evaluator = Evaluator() 269 | evaluator.compare(gs_iterator, pred_iterator) 270 | evaluator.print_scores() 271 | -------------------------------------------------------------------------------- /thirdparty/jquery.curie.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery CURIE @VERSION 3 | * 4 | * Copyright (c) 2008,2009 Jeni Tennison 5 | * Licensed under the MIT (MIT-LICENSE.txt) 6 | * 7 | * Depends: 8 | * jquery.uri.js 9 | * jquery.xmlns.js 10 | */ 11 | 12 | /** 13 | * @fileOverview jQuery CURIE handling 14 | * @author Jeni Tennison 15 | * @copyright (c) 2008,2009 Jeni Tennison 16 | * @license MIT license (MIT-LICENSE.txt) 17 | * @version 1.0 18 | * @requires jquery.uri.js 19 | * @requires jquery.xmlns.js 20 | */ 21 | (function ($) { 22 | 23 | /** 24 | * Creates a {@link jQuery.uri} object by parsing a CURIE. 25 | * @methodOf jQuery 26 | * @param {String} curie The CURIE to be parsed 27 | * @param {String} uri The URI string to be converted to a CURIE. 28 | * @param {Object} [options] CURIE parsing options 29 | * @param {string} [options.reservedNamespace='http://www.w3.org/1999/xhtml/vocab#'] The namespace to apply to a CURIE that has no prefix and either starts with a colon or is in the list of reserved local names 30 | * @param {string} [options.defaultNamespace] The namespace to apply to a CURIE with no prefix which is not mapped to the reserved namespace by the rules given above. 31 | * @param {Object} [options.namespaces] A map of namespace bindings used to map CURIE prefixes to URIs. 32 | * @param {string[]} [options.reserved=['alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright', 'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next', 'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up']] A list of local names that will always be mapped to the URI specified by reservedNamespace. 33 | * @param {string} [options.charcase='lower'] Specifies whether the curie's case is altered before it's interpreted. Acceptable values are: 34 | *
35 | *
lower
Force the CURIE string to lower case.
36 | *
upper
Force the CURIE string to upper case.
37 | *
preserve
Preserve the original case of the CURIE. Note that this might not be possible if the CURIE has been taken from an HTML attribute value because of the case conversions performed automatically by browsers. For this reason, it's a good idea to avoid mixed-case CURIEs within RDFa.
38 | *
39 | * @returns {jQuery.uri} A new {@link jQuery.uri} object representing the full absolute URI specified by the CURIE. 40 | */ 41 | $.curie = function (curie, options) { 42 | var 43 | opts = $.extend({}, $.curie.defaults, options || {}), 44 | m = /^(([^:]*):)?(.+)$/.exec(curie), 45 | prefix = m[2], 46 | local = m[3], 47 | ns = opts.namespaces[prefix]; 48 | if (/^:.+/.test(curie)) { // This is the case of a CURIE like ":test" 49 | if (opts.reservedNamespace === undefined || opts.reservedNamespace === null) { 50 | throw "Malformed CURIE: No prefix and no default namespace for unprefixed CURIE " + curie; 51 | } else { 52 | ns = opts.reservedNamespace; 53 | } 54 | } else if (prefix) { 55 | if (ns === undefined) { 56 | throw "Malformed CURIE: No namespace binding for " + prefix + " in CURIE " + curie; 57 | } 58 | } else { 59 | if (opts.charcase === 'lower') { 60 | curie = curie.toLowerCase(); 61 | } else if (opts.charcase === 'upper') { 62 | curie = curie.toUpperCase(); 63 | } 64 | if (opts.reserved.length && $.inArray(curie, opts.reserved) >= 0) { 65 | ns = opts.reservedNamespace; 66 | local = curie; 67 | } else if (opts.defaultNamespace === undefined || opts.defaultNamespace === null) { 68 | // the default namespace is provided by the application; it's not clear whether 69 | // the default XML namespace should be used if there's a colon but no prefix 70 | throw "Malformed CURIE: No prefix and no default namespace for unprefixed CURIE " + curie; 71 | } else { 72 | ns = opts.defaultNamespace; 73 | } 74 | } 75 | return $.uri(ns + local); 76 | }; 77 | 78 | $.curie.defaults = { 79 | namespaces: {}, 80 | reserved: [], 81 | reservedNamespace: undefined, 82 | defaultNamespace: undefined, 83 | charcase: 'preserve' 84 | }; 85 | 86 | /** 87 | * Creates a {@link jQuery.uri} object by parsing a safe CURIE string (a CURIE 88 | * contained within square brackets). If the input safeCurie string does not 89 | * start with '[' and end with ']', the entire string content will be interpreted 90 | * as a URI string. 91 | * @methodOf jQuery 92 | * @param {String} safeCurie The safe CURIE string to be parsed. 93 | * @param {Object} [options] CURIE parsing options 94 | * @param {string} [options.reservedNamespace='http://www.w3.org/1999/xhtml/vocab#'] The namespace to apply to a CURIE that has no prefix and either starts with a colon or is in the list of reserved local names 95 | * @param {string} [options.defaultNamespace] The namespace to apply to a CURIE with no prefix which is not mapped to the reserved namespace by the rules given above. 96 | * @param {Object} [options.namespaces] A map of namespace bindings used to map CURIE prefixes to URIs. 97 | * @param {string[]} [options.reserved=['alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright', 98 | 'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next', 99 | 'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up']] 100 | A list of local names that will always be mapped to the URI specified by reservedNamespace. 101 | * @param {string} [options.charcase='lower'] Specifies whether the curie's case is altered before it's interpreted. Acceptable values are: 102 | *
103 | *
lower
Force the CURIE string to lower case.
104 | *
upper
Force the CURIE string to upper case.
105 | *
preserve
Preserve the original case of the CURIE. Note that this might not be possible if the CURIE has been taken from an HTML attribute value because of the case conversions performed automatically by browsers. For this reason, it's a good idea to avoid mixed-case CURIEs within RDFa.
106 | *
107 | * @returns {jQuery.uri} A new {@link jQuery.uri} object representing the full absolute URI specified by the CURIE. 108 | */ 109 | $.safeCurie = function (safeCurie, options) { 110 | var m = /^\[([^\]]+)\]$/.exec(safeCurie); 111 | return m ? $.curie(m[1], options) : $.uri(safeCurie); 112 | }; 113 | 114 | /** 115 | * Creates a CURIE string from a URI string. 116 | * @methodOf jQuery 117 | * @param {String} uri The URI string to be converted to a CURIE. 118 | * @param {Object} [options] CURIE parsing options 119 | * @param {string} [options.reservedNamespace='http://www.w3.org/1999/xhtml/vocab#'] 120 | * If the input URI starts with this value, the generated CURIE will 121 | * have no namespace prefix and will start with a colon character (:), 122 | * unless the local part of the CURIE is one of the reserved names specified 123 | * by the reservedNames option (see below), in which case the generated 124 | * CURIE will have no namespace prefix and will not start with a colon 125 | * character. 126 | * @param {string} [options.defaultNamespace] If the input URI starts with this value, the generated CURIE will have no namespace prefix and will not start with a colon. 127 | * @param {Object} [options.namespaces] A map of namespace bindings used to map CURIE prefixes to URIs. 128 | * @param {string[]} [options.reserved=['alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright', 129 | 'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next', 130 | 'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up']] 131 | A list of local names that will always be mapped to the URI specified by reservedNamespace. 132 | * @param {string} [options.charcase='lower'] Specifies the case normalisation done to the CURIE. Acceptable values are: 133 | *
134 | *
lower
Normalise the CURIE to lower case.
135 | *
upper
Normalise the CURIE to upper case.
136 | *
preserve
Preserve the original case of the CURIE. Note that this might not be possible if the CURIE has been taken from an HTML attribute value because of the case conversions performed automatically by browsers. For this reason, it's a good idea to avoid mixed-case CURIEs within RDFa.
137 | *
138 | * @returns {jQuery.uri} A new {@link jQuery.uri} object representing the full absolute URI specified by the CURIE. 139 | */ 140 | $.createCurie = function (uri, options) { 141 | var opts = $.extend({}, $.curie.defaults, options || {}), 142 | ns = opts.namespaces, 143 | curie; 144 | uri = $.uri(uri).toString(); 145 | if (opts.reservedNamespace !== undefined && 146 | uri.substring(0, opts.reservedNamespace.toString().length) === opts.reservedNamespace.toString()) { 147 | curie = uri.substring(opts.reservedNamespace.toString().length); 148 | if ($.inArray(curie, opts.reserved) === -1) { 149 | curie = ':' + curie; 150 | } 151 | } else { 152 | $.each(ns, function (prefix, namespace) { 153 | if (uri.substring(0, namespace.toString().length) === namespace.toString()) { 154 | curie = prefix + ':' + uri.substring(namespace.toString().length); 155 | return null; 156 | } 157 | }); 158 | } 159 | if (curie === undefined) { 160 | throw "No Namespace Binding: There's no appropriate namespace binding for generating a CURIE from " + uri; 161 | } else { 162 | return curie; 163 | } 164 | }; 165 | 166 | /** 167 | * Creates a {@link jQuery.uri} object by parsing the specified 168 | * CURIE string in the context of the namespaces defined by the 169 | * jQuery selection. 170 | * @methodOf jQuery# 171 | * @name jQuery#curie 172 | * @param {String} curie The CURIE string to be parsed 173 | * @param {Object} options The CURIE parsing options. 174 | * See {@link jQuery.curie} for details of the supported options. 175 | * The namespace declarations declared on the current jQuery 176 | * selection (and inherited from any ancestor elements) will automatically 177 | * be included in the options.namespaces property. 178 | * @returns {jQuery.uri} 179 | * @see jQuery.curie 180 | */ 181 | $.fn.curie = function (curie, options) { 182 | var opts = $.extend({}, $.fn.curie.defaults, { namespaces: this.xmlns() }, options || {}); 183 | return $.curie(curie, opts); 184 | }; 185 | 186 | /** 187 | * Creates a {@link jQuery.uri} object by parsing the specified 188 | * safe CURIE string in the context of the namespaces defined by 189 | * the jQuery selection. 190 | * 191 | * @methodOf jQuery# 192 | * @name jQuery#safeCurie 193 | * @param {String} safeCurie The safe CURIE string to be parsed. See {@link jQuery.safeCurie} for details on how safe CURIE strings are processed. 194 | * @param {Object} options The CURIE parsing options. 195 | * See {@link jQuery.safeCurie} for details of the supported options. 196 | * The namespace declarations declared on the current jQuery 197 | * selection (and inherited from any ancestor elements) will automatically 198 | * be included in the options.namespaces property. 199 | * @returns {jQuery.uri} 200 | * @see jQuery.safeCurie 201 | */ 202 | $.fn.safeCurie = function (safeCurie, options) { 203 | var opts = $.extend({}, $.fn.curie.defaults, { namespaces: this.xmlns() }, options || {}); 204 | return $.safeCurie(safeCurie, opts); 205 | }; 206 | 207 | /** 208 | * Creates a CURIE string from a URI string using the namespace 209 | * bindings in the context of the current jQuery selection. 210 | * 211 | * @methodOf jQuery# 212 | * @name jQuery#createCurie 213 | * @param {String|jQuery.uri} uri The URI string to be converted to a CURIE 214 | * @param {Object} options the CURIE parsing options. 215 | * See {@link jQuery.createCurie} for details of the supported options. 216 | * The namespace declarations declared on the current jQuery 217 | * selection (and inherited from any ancestor elements) will automatically 218 | * be included in the options.namespaces property. 219 | * @returns {String} 220 | * @see jQuery.createCurie 221 | */ 222 | $.fn.createCurie = function (uri, options) { 223 | var opts = $.extend({}, $.fn.curie.defaults, { namespaces: this.xmlns() }, options || {}); 224 | return $.createCurie(uri, opts); 225 | }; 226 | 227 | $.fn.curie.defaults = { 228 | reserved: [ 229 | 'alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright', 230 | 'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next', 231 | 'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up' 232 | ], 233 | reservedNamespace: 'http://www.w3.org/1999/xhtml/vocab#', 234 | defaultNamespace: undefined, 235 | charcase: 'lower' 236 | }; 237 | 238 | })(jQuery); 239 | -------------------------------------------------------------------------------- /thirdparty/jquery.datatype.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery CURIE @VERSION 3 | * 4 | * Copyright (c) 2008,2009 Jeni Tennison 5 | * Licensed under the MIT (MIT-LICENSE.txt) 6 | * 7 | * Depends: 8 | * jquery.uri.js 9 | */ 10 | /** 11 | * @fileOverview XML Schema datatype handling 12 | * @author Jeni Tennison 13 | * @copyright (c) 2008,2009 Jeni Tennison 14 | * @license MIT license (MIT-LICENSE.txt) 15 | * @version 1.0 16 | * @requires jquery.uri.js 17 | */ 18 | 19 | (function ($) { 20 | 21 | var strip = function (value) { 22 | return value.replace(/[ \t\n\r]+/, ' ').replace(/^ +/, '').replace(/ +$/, ''); 23 | }; 24 | 25 | /** 26 | * Creates a new jQuery.typedValue object. This should be invoked as a method 27 | * rather than constructed using new. 28 | * @class Represents a value with an XML Schema datatype 29 | * @param {String} value The string representation of the value 30 | * @param {String} datatype The XML Schema datatype URI 31 | * @returns {jQuery.typedValue} 32 | * @example intValue = jQuery.typedValue('42', 'http://www.w3.org/2001/XMLSchema#integer'); 33 | */ 34 | $.typedValue = function (value, datatype) { 35 | return $.typedValue.fn.init(value, datatype); 36 | }; 37 | 38 | $.typedValue.fn = $.typedValue.prototype = { 39 | /** 40 | * The string representation of the value 41 | * @memberOf jQuery.typedValue# 42 | */ 43 | representation: undefined, 44 | /** 45 | * The value as an object. The type of the object will 46 | * depend on the XML Schema datatype URI specified 47 | * in the constructor. The following table lists the mappings 48 | * currently supported: 49 | * 50 | * 51 | * 52 | * 53 | * 54 | * 55 | * 56 | * 57 | * 58 | * 59 | * 60 | * 61 | * 62 | * 63 | * 64 | * 65 | * 66 | * 67 | * 68 | * 69 | * 70 | * 71 | * 72 | * 73 | * 74 | * 75 | * 76 | * 77 | * 78 | * 79 | * 80 | * 81 | * 82 | * 83 | * 84 | * 85 | * 86 | * 87 | * 88 | * 89 | * 90 | * 91 | * 92 | * 93 | * 94 | * 95 | * 96 | * 97 | * 98 | * 99 | * 100 | * 101 | * 102 | * 103 | * 104 | * 105 | * 106 | * 107 | * 108 | * 109 | * 110 | *
XML Schema DatatypeValue type
http://www.w3.org/2001/XMLSchema#stringstring
http://www.w3.org/2001/XMLSchema#tokenstring
http://www.w3.org/2001/XMLSchema#NCNamestring
http://www.w3.org/2001/XMLSchema#booleanbool
http://www.w3.org/2001/XMLSchema#decimalstring
http://www.w3.org/2001/XMLSchema#integerint
http://www.w3.org/2001/XMLSchema#intint
http://www.w3.org/2001/XMLSchema#floatfloat
http://www.w3.org/2001/XMLSchema#doublefloat
http://www.w3.org/2001/XMLSchema#dateTimestring
http://www.w3.org/2001/XMLSchema#datestring
http://www.w3.org/2001/XMLSchema#gYearint
http://www.w3.org/2001/XMLSchema#gMonthDaystring
http://www.w3.org/2001/XMLSchema#anyURI{@link jQuery.uri}
111 | * @memberOf jQuery.typedValue# 112 | */ 113 | value: undefined, 114 | /** 115 | * The XML Schema datatype URI for the value's datatype 116 | * @memberOf jQuery.typedValue# 117 | */ 118 | datatype: undefined, 119 | 120 | init: function (value, datatype) { 121 | var d = $.typedValue.types[datatype]; 122 | if ($.typedValue.valid(value, datatype)) { 123 | this.representation = value; 124 | this.datatype = datatype; 125 | this.value = d === undefined ? strip(value) : d.value(d.strip ? strip(value) : value); 126 | return this; 127 | } else { 128 | throw { 129 | name: 'InvalidValue', 130 | message: value + ' is not a valid ' + datatype + ' value' 131 | }; 132 | } 133 | } 134 | }; 135 | 136 | $.typedValue.fn.init.prototype = $.typedValue.fn; 137 | 138 | /** 139 | * An object that holds the datatypes supported by the script. The properties of this object are the URIs of the datatypes, and each datatype has four properties: 140 | *
141 | *
strip
142 | *
A boolean value that indicates whether whitespace should be stripped from the value prior to testing against the regular expression or passing to the value function.
143 | *
regex
144 | *
A regular expression that valid values of the type must match.
145 | *
validate
146 | *
Optional. A function that performs further testing on the value.
147 | *
value
148 | *
A function that returns a Javascript object equivalent for the value.
149 | *
150 | * You can add to this object as necessary for your own datatypes, and {@link jQuery.typedValue} and {@link jQuery.typedValue.valid} will work with them. 151 | * @see jQuery.typedValue 152 | * @see jQuery.typedValue.valid 153 | */ 154 | $.typedValue.types = {}; 155 | 156 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#string'] = { 157 | regex: /^.*$/, 158 | strip: false, 159 | /** @ignore */ 160 | value: function (v) { 161 | return v; 162 | } 163 | }; 164 | 165 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#token'] = { 166 | regex: /^.*$/, 167 | strip: true, 168 | /** @ignore */ 169 | value: function (v) { 170 | return strip(v); 171 | } 172 | }; 173 | 174 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#NCName'] = { 175 | regex: /^[a-z_][-\.a-z0-9]+$/i, 176 | strip: true, 177 | /** @ignore */ 178 | value: function (v) { 179 | return strip(v); 180 | } 181 | }; 182 | 183 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#boolean'] = { 184 | regex: /^(?:true|false|1|0)$/, 185 | strip: true, 186 | /** @ignore */ 187 | value: function (v) { 188 | return v === 'true' || v === '1'; 189 | } 190 | }; 191 | 192 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#decimal'] = { 193 | regex: /^[\-\+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)$/, 194 | strip: true, 195 | /** @ignore */ 196 | value: function (v) { 197 | v = v.replace(/^0+/, '') 198 | .replace(/0+$/, ''); 199 | if (v === '') { 200 | v = '0.0'; 201 | } 202 | if (v.substring(0, 1) === '.') { 203 | v = '0' + v; 204 | } 205 | if (/\.$/.test(v)) { 206 | v = v + '0'; 207 | } else if (!/\./.test(v)) { 208 | v = v + '.0'; 209 | } 210 | return v; 211 | } 212 | }; 213 | 214 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#integer'] = { 215 | regex: /^[\-\+]?[0-9]+$/, 216 | strip: true, 217 | /** @ignore */ 218 | value: function (v) { 219 | return parseInt(v, 10); 220 | } 221 | }; 222 | 223 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#int'] = { 224 | regex: /^[\-\+]?[0-9]+$/, 225 | strip: true, 226 | /** @ignore */ 227 | value: function (v) { 228 | return parseInt(v, 10); 229 | } 230 | }; 231 | 232 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#float'] = { 233 | regex: /^(?:[\-\+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:[eE][\-\+]?[0-9]+)?|[\-\+]?INF|NaN)$/, 234 | strip: true, 235 | /** @ignore */ 236 | value: function (v) { 237 | if (v === '-INF') { 238 | return -1 / 0; 239 | } else if (v === 'INF' || v === '+INF') { 240 | return 1 / 0; 241 | } else { 242 | return parseFloat(v); 243 | } 244 | } 245 | }; 246 | 247 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#double'] = { 248 | regex: $.typedValue.types['http://www.w3.org/2001/XMLSchema#float'].regex, 249 | strip: true, 250 | value: $.typedValue.types['http://www.w3.org/2001/XMLSchema#float'].value 251 | }; 252 | 253 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#duration'] = { 254 | regex: /^([\-\+])?P(?:([0-9]+)Y)?(?:([0-9]+)M)?(?:([0-9]+)D)?(?:T(?:([0-9]+)H)?(?:([0-9]+)M)?(?:([0-9]+(?:\.[0-9]+)?)?S)?)$/, 255 | /** @ignore */ 256 | validate: function (v) { 257 | var m = this.regex.exec(v); 258 | return m[2] || m[3] || m[4] || m[5] || m[6] || m[7]; 259 | }, 260 | strip: true, 261 | /** @ignore */ 262 | value: function (v) { 263 | return v; 264 | } 265 | }; 266 | 267 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#yearMonthDuration'] = { 268 | regex: /^([\-\+])?P(?:([0-9]+)Y)?(?:([0-9]+)M)?$/, 269 | /** @ignore */ 270 | validate: function (v) { 271 | var m = this.regex.exec(v); 272 | return m[2] || m[3]; 273 | }, 274 | strip: true, 275 | /** @ignore */ 276 | value: function (v) { 277 | var m = this.regex.exec(v), 278 | years = m[2] || 0, 279 | months = m[3] || 0; 280 | months += years * 12; 281 | return m[1] === '-' ? -1 * months : months; 282 | } 283 | }; 284 | 285 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#dateTime'] = { 286 | regex: /^(-?[0-9]{4,})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):(([0-9]{2})(\.([0-9]+))?)((?:[\-\+]([0-9]{2}):([0-9]{2}))|Z)?$/, 287 | /** @ignore */ 288 | validate: function (v) { 289 | var 290 | m = this.regex.exec(v), 291 | year = parseInt(m[1], 10), 292 | tz = m[10] === undefined || m[10] === 'Z' ? '+0000' : m[10].replace(/:/, ''), 293 | date; 294 | if (year === 0 || 295 | parseInt(tz, 10) < -1400 || parseInt(tz, 10) > 1400) { 296 | return false; 297 | } 298 | try { 299 | year = year < 100 ? Math.abs(year) + 1000 : year; 300 | month = parseInt(m[2], 10); 301 | day = parseInt(m[3], 10); 302 | if (day > 31) { 303 | return false; 304 | } else if (day > 30 && !(month === 1 || month === 3 || month === 5 || month === 7 || month === 8 || month === 10 || month === 12)) { 305 | return false; 306 | } else if (month === 2) { 307 | if (day > 29) { 308 | return false; 309 | } else if (day === 29 && (year % 4 !== 0 || (year % 100 === 0 && year % 400 !== 0))) { 310 | return false; 311 | } 312 | } 313 | date = '' + year + '/' + m[2] + '/' + m[3] + ' ' + m[4] + ':' + m[5] + ':' + m[7] + ' ' + tz; 314 | date = new Date(date); 315 | return true; 316 | } catch (e) { 317 | return false; 318 | } 319 | }, 320 | strip: true, 321 | /** @ignore */ 322 | value: function (v) { 323 | return v; 324 | } 325 | }; 326 | 327 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#date'] = { 328 | regex: /^(-?[0-9]{4,})-([0-9]{2})-([0-9]{2})((?:[\-\+]([0-9]{2}):([0-9]{2}))|Z)?$/, 329 | /** @ignore */ 330 | validate: function (v) { 331 | var 332 | m = this.regex.exec(v), 333 | year = parseInt(m[1], 10), 334 | month = parseInt(m[2], 10), 335 | day = parseInt(m[3], 10), 336 | tz = m[10] === undefined || m[10] === 'Z' ? '+0000' : m[10].replace(/:/, ''); 337 | if (year === 0 || 338 | month > 12 || 339 | day > 31 || 340 | parseInt(tz, 10) < -1400 || parseInt(tz, 10) > 1400) { 341 | return false; 342 | } else { 343 | return true; 344 | } 345 | }, 346 | strip: true, 347 | /** @ignore */ 348 | value: function (v) { 349 | return v; 350 | } 351 | }; 352 | 353 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#gYear'] = { 354 | regex: /^-?([0-9]{4,})$/, 355 | /** @ignore */ 356 | validate: function (v) { 357 | var i = parseInt(v, 10); 358 | return i !== 0; 359 | }, 360 | strip: true, 361 | /** @ignore */ 362 | value: function (v) { 363 | return parseInt(v, 10); 364 | } 365 | }; 366 | 367 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#gMonthDay'] = { 368 | regex: /^--([0-9]{2})-([0-9]{2})((?:[\-\+]([0-9]{2}):([0-9]{2}))|Z)?$/, 369 | /** @ignore */ 370 | validate: function (v) { 371 | var 372 | m = this.regex.exec(v), 373 | month = parseInt(m[1], 10), 374 | day = parseInt(m[2], 10), 375 | tz = m[3] === undefined || m[3] === 'Z' ? '+0000' : m[3].replace(/:/, ''); 376 | if (month > 12 || 377 | day > 31 || 378 | parseInt(tz, 10) < -1400 || parseInt(tz, 10) > 1400) { 379 | return false; 380 | } else if (month === 2 && day > 29) { 381 | return false; 382 | } else if ((month === 4 || month === 6 || month === 9 || month === 11) && day > 30) { 383 | return false; 384 | } else { 385 | return true; 386 | } 387 | }, 388 | strip: true, 389 | /** @ignore */ 390 | value: function (v) { 391 | return v; 392 | } 393 | }; 394 | 395 | $.typedValue.types['http://www.w3.org/2001/XMLSchema#anyURI'] = { 396 | regex: /^.*$/, 397 | strip: true, 398 | /** @ignore */ 399 | value: function (v, options) { 400 | var opts = $.extend({}, $.typedValue.defaults, options); 401 | return $.uri.resolve(v, opts.base); 402 | } 403 | }; 404 | 405 | $.typedValue.defaults = { 406 | base: $.uri.base(), 407 | namespaces: {} 408 | }; 409 | 410 | /** 411 | * Checks whether a value is valid according to a given datatype. The datatype must be held in the {@link jQuery.typedValue.types} object. 412 | * @param {String} value The value to validate. 413 | * @param {String} datatype The URI for the datatype against which the value will be validated. 414 | * @returns {boolean} True if the value is valid or the datatype is not recognised. 415 | * @example validDate = $.typedValue.valid(date, 'http://www.w3.org/2001/XMLSchema#date'); 416 | */ 417 | $.typedValue.valid = function (value, datatype) { 418 | var d = $.typedValue.types[datatype]; 419 | if (d === undefined) { 420 | return true; 421 | } else { 422 | value = d.strip ? strip(value) : value; 423 | if (d.regex.test(value)) { 424 | return d.validate === undefined ? true : d.validate(value); 425 | } else { 426 | return false; 427 | } 428 | } 429 | }; 430 | 431 | })(jQuery); 432 | -------------------------------------------------------------------------------- /thirdparty/jquery.rules.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery RDF Rules @VERSION 3 | * 4 | * Copyright (c) 2008 Jeni Tennison 5 | * Licensed under the MIT (MIT-LICENSE.txt) 6 | * 7 | * Depends: 8 | * jquery.uri.js 9 | * jquery.xmlns.js 10 | * jquery.datatype.js 11 | * jquery.curie.js 12 | * jquery.rdf.js 13 | */ 14 | /** 15 | * @fileOverview jQuery RDF Rules 16 | * @author Jeni Tennison 17 | * @copyright (c) 2008,2009 Jeni Tennison 18 | * @license MIT license (MIT-LICENSE.txt) 19 | * @version 1.0 20 | */ 21 | /** 22 | * @exports $ as jQuery 23 | */ 24 | /** 25 | * @ignore 26 | */ 27 | (function ($) { 28 | 29 | var 30 | blankNodeNum = 1; 31 | 32 | /** 33 | *

Creates a new jQuery.rdf.ruleset object. This should be invoked as a method rather than constructed using new.

34 | * @class A jQuery.rdf.ruleset object represents a set of {@link jQuery.rdf.rule}s that can be run over a databank. 35 | * @param {jQuery.rdf.rule[]} [rules=[]] An array of rules with which the ruleset is initialised. 36 | * @param {Object} [options] Initialisation options for the ruleset. 37 | * @param {Object} [options.namespaces] An object representing a set of namespace bindings which are stored and used whenever a CURIE is used within a rule. 38 | * @param {String|jQuery.uri} [options.base] The base URI used to interpret any relative URIs used within the rules. 39 | * @returns {jQuery.rdf.ruleset} 40 | * @example rules = jQuery.rdf.ruleset(); 41 | * @see jQuery.rdf.rule 42 | */ 43 | $.rdf.ruleset = function (rules, options) { 44 | return new $.rdf.ruleset.fn.init(rules, options); 45 | }; 46 | 47 | $.rdf.ruleset.fn = $.rdf.ruleset.prototype = { 48 | init: function (rules, options) { 49 | var i, 50 | opts = $.extend({}, $.rdf.ruleset.defaults, options); 51 | rules = rules || []; 52 | this.baseURI = opts.base; 53 | this.namespaces = $.extend({}, opts.namespaces); 54 | this.rules = []; 55 | for (i = 0; i < rules.length; i += 1) { 56 | this.add.apply(this, rules[i]); 57 | } 58 | return this; 59 | }, 60 | 61 | /** 62 | * Sets or returns the base URI of the {@link jQuery.rdf.ruleset}. 63 | * @param {String|jQuery.uri} [base] 64 | * @returns A {@link jQuery.uri} if no base URI is specified, otherwise returns this {@link jQuery.rdf.ruleset} object. 65 | * @example 66 | * rules = $.rdf.ruleset() 67 | * .base('http://www.example.org/'); 68 | */ 69 | base: function (uri) { 70 | if (uri === undefined) { 71 | return this.baseURI; 72 | } else { 73 | this.baseURI = uri; 74 | return this; 75 | } 76 | }, 77 | 78 | /** 79 | * Sets or returns a namespace binding on the {@link jQuery.rdf.ruleset}. 80 | * @param {String} [prefix] 81 | * @param {String} [namespace] 82 | * @returns {Object|jQuery.uri|jQuery.rdf} If no prefix or namespace is specified, returns an object providing all namespace bindings on the {@link jQuery.rdf.ruleset}. If a prefix is specified without a namespace, returns the {@link jQuery.uri} associated with that prefix. Otherwise returns this {@link jQuery.rdf} object after setting the namespace binding. 83 | */ 84 | prefix: function (prefix, uri) { 85 | if (prefix === undefined) { 86 | return this.namespaces; 87 | } else if (uri === undefined) { 88 | return this.namespaces[prefix]; 89 | } else { 90 | this.namespaces[prefix] = uri; 91 | return this; 92 | } 93 | }, 94 | 95 | /** 96 | * Returns the number of rules in this ruleset. 97 | * @returns {Integer} 98 | */ 99 | size: function () { 100 | return this.rules.length; 101 | }, 102 | 103 | /** 104 | * Adds a rule or set of rules to this ruleset. 105 | * @param {String|Array|Function|jQuery.rdf.pattern|jQuery.rdf.rule|jQuery.rdf.ruleset} lhs A {@link jQuery.rdf.rule} will be added directly. If a {@link jQuery.rdf.ruleset} is provided then all its rules will be added to this one. Otherwise, specifies the left hand side of the rule to be added, as in {@link jQuery.rdf.rule}. 106 | * @param {String|Function|jQuery.rdf.pattern} rhs The right hand side of the rule to be added. 107 | * @returns {jQuery.rdf.ruleset} Returns this {@link jQuery.rdf.ruleset} 108 | * @see jQuery.rdf.rule 109 | * @example 110 | * rules = $.rdf.ruleset() 111 | * .prefix('foaf', ns.foaf) 112 | * .add('?person a foaf:Person', '?person a foaf:Agent'); 113 | */ 114 | add: function (lhs, rhs) { 115 | var rule; 116 | if (rhs === undefined && lhs.rules) { 117 | this.rules = this.rules.concat(lhs.rules); 118 | } else { 119 | if (rhs === undefined && lhs.lhs) { 120 | rule = lhs; 121 | } else { 122 | rule = $.rdf.rule(lhs, rhs, { namespaces: this.prefix(), base: this.base() }); 123 | } 124 | if ($.inArray(rule, this.rules) === -1) { 125 | this.rules.push(rule); 126 | } 127 | } 128 | return this; 129 | }, 130 | 131 | /** 132 | * Runs the rules held in this ruleset on the data passed as the first argument. 133 | * @param {jQuery.rdf.databank} data A databank containing data to be reasoned over and added to. 134 | * @param {Object} [options] 135 | * @param {Integer} [options.limit=50] The rules in this ruleset are generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the ruleset will be run before halting. 136 | * @returns {jQuery.rdf.ruleset} Returns this ruleset. 137 | * @example 138 | * rules = $.rdf.ruleset() 139 | * .prefix('foaf', ns.foaf) 140 | * .add('?person a foaf:Person', '?person a foaf:Agent') 141 | * .run(data); 142 | * @see jQuery.rdf#reason 143 | * @see jQuery.rdf.databank#reason 144 | */ 145 | run: function (data, options) { 146 | var i, r, ntriples, 147 | opts = $.extend({ limit: 50 }, options), 148 | limit = opts.limit; 149 | do { 150 | ntriples = data.size(); 151 | for (i = 0; i < this.rules.length; i += 1) { 152 | r = this.rules[i]; 153 | r.run(data); 154 | } 155 | limit -= 1; 156 | } while (data.size() > ntriples && limit > 0); 157 | return this; 158 | } 159 | }; 160 | 161 | $.rdf.ruleset.fn.init.prototype = $.rdf.ruleset.fn; 162 | 163 | $.rdf.ruleset.defaults = { 164 | base: $.uri.base(), 165 | namespaces: {} 166 | }; 167 | 168 | /* Rules */ 169 | 170 | /** 171 | *

Creates a new jQuery.rdf.rule object. This should be invoked as a method rather than constructed using new.

172 | * @class A jQuery.rdf.rule object represents a rule that can be run over a {@link jQuery.rdf.databank}. 173 | * @param {Object[]} lhs The left-hand side of the rule. This can be an array containing multiple conditions, or a single condition on its own. Each condition is one of: 174 | *
    175 | *
  • A {@link jQuery.rdf.pattern} or a string which is interpreted as a {@link jQuery.rdf.pattern}, which is used to match triples as with the {@link jQuery.rdf#where} method.
  • 176 | *
  • A function which must return true for the rule to be satisfied. The arguments for the function are as described in the documentation for {@link jQuery.rdf#filter}.
  • 177 | *
  • An array of two items: a variable name and either a regular expression or a value that it matches against (as used in the two arguments to {@link jQuery.rdf#filter}).
  • 178 | *
179 | * @param {Function|String[]} rhs The right-hand side of the rule. This can be an array of strings which are interpreted as patterns and used to create new triples when the rule is fired. If the patterns contain references to blank nodes, new blank nodes are created each time the rule is fired. Alternatively, it can be a function which is executed when the rule is fired. The function needs to have the same signature as that used for {@link jQuery.rdf#map}. 180 | * @param {Object} [options] Initialisation options for the rules. 181 | * @param {Object} [options.namespaces] An object representing a set of namespace bindings which are stored and used whenever a CURIE is used within the left or right-hand sides of the rule. 182 | * @param {String|jQuery.uri} [options.base] The base URI used to interpret any relative URIs used within the rule. 183 | * @returns {jQuery.rdf.rule} 184 | * @example $.rdf.rule('?person a foaf:Person', '?person a foaf:Agent', { namespaces: ns }); 185 | * @example 186 | * var rule = $.rdf.rule( 187 | * ['?person a vcard:VCard', 188 | * '?person vcard:fn ?name'], 189 | * ['?person a foaf:Person', 190 | * '?person foaf:name ?name'], 191 | * { namespaces: ns } 192 | * ); 193 | * @example 194 | * var rule = $.rdf.rule( 195 | * ['?person a foaf:Person', 196 | * '?person foaf:firstName ?fn'], 197 | * ['?person a vcard:VCard', 198 | * '?person vcard:n _:name', 199 | * '_:name a vcard:Name', 200 | * '_:name vcard:given-name ?fn'], 201 | * { namespaces: ns } 202 | * ); 203 | * @example 204 | * var rule = $.rdf.rule( 205 | * ['?person foaf:name ?name', 206 | * ['name', /^J.+/]], 207 | * function () { name = this.name }, 208 | * { namespaces: ns }); 209 | * @see jQuery.rdf.rule 210 | */ 211 | $.rdf.rule = function (lhs, rhs, options) { 212 | return new $.rdf.rule.fn.init(lhs, rhs, options); 213 | }; 214 | 215 | $.rdf.rule.fn = $.rdf.rule.prototype = { 216 | init: function (lhs, rhs, options) { 217 | var opts = $.extend({}, $.rdf.rule.defaults, options), 218 | lhsWildcards = [], rhsBlanks = false; 219 | if (typeof lhs === 'string') { 220 | lhs = [lhs]; 221 | } 222 | if (typeof rhs === 'string') { 223 | rhs = [rhs]; 224 | } 225 | this.lhs = $.map(lhs, function (p) { 226 | if ($.isArray(p)) { 227 | return [p]; 228 | } else if ($.isFunction(p)) { 229 | return p; 230 | } else { 231 | p = $.rdf.pattern(p, opts); 232 | if (typeof p.subject === 'string') { 233 | lhsWildcards.push(p.subject); 234 | } 235 | if (typeof p.property === 'string') { 236 | lhsWildcards.push(p.property); 237 | } 238 | if (typeof p.object === 'string') { 239 | lhsWildcards.push(p.object); 240 | } 241 | return p; 242 | } 243 | }); 244 | lhsWildcards = $.unique(lhsWildcards); 245 | if ($.isFunction(rhs)) { 246 | this.rhs = rhs; 247 | } else { 248 | this.rhs = $.map(rhs, function (p) { 249 | p = $.rdf.pattern(p, opts); 250 | if ((typeof p.subject === 'string' && $.inArray(p.subject, lhsWildcards) === -1) || 251 | (typeof p.property === 'string' && $.inArray(p.property, lhsWildcards) === -1) || 252 | (typeof p.object === 'string' && $.inArray(p.object, lhsWildcards) === -1)) { 253 | throw "Bad Rule: Right-hand side of the rule contains a reference to an unbound wildcard"; 254 | } else if (p.subject.type === 'bnode' || p.property.type === 'bnode' || p.object.type === 'bnode') { 255 | rhsBlanks = true; 256 | } 257 | return p; 258 | }); 259 | } 260 | this.rhsBlanks = rhsBlanks; 261 | this.cache = {}; 262 | return this; 263 | }, 264 | 265 | /** 266 | * Runs the rule on the data passed as the first argument. 267 | * @param {jQuery.rdf.databank} data A databank containing data to be reasoned over and added to. 268 | * @param {Object} [options] 269 | * @param {Integer} [options.limit=50] The rule isArray generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the rule will be run before halting. 270 | * @returns {jQuery.rdf.rule} Returns this rule. 271 | * @example 272 | * $.rdf.rule('?person a foaf:Person', '?person a foaf:Agent', { namespaces: ns }) 273 | * .run(data); 274 | * @see jQuery.rdf.ruleset#run 275 | * @see jQuery.rdf#reason 276 | * @see jQuery.rdf.databank#reason 277 | */ 278 | run: function (data, options) { 279 | var query = $.rdf({ databank: data }), 280 | condition, 281 | opts = $.extend({ limit: 50 }, options), limit = opts.limit, 282 | ntriples, 283 | i, j, pattern, s, p, o, q, 284 | blanks = this.rhsBlanks, 285 | cache, sources, triples, add; 286 | if (this.cache[data.id] === undefined) { 287 | this.cache[data.id] = {}; 288 | } 289 | for (i = 0; i < this.lhs.length; i += 1) { 290 | condition = this.lhs[i]; 291 | if ($.isArray(condition)) { 292 | query = query.filter.apply(query, condition); 293 | } else if ($.isFunction(condition)) { 294 | query = query.filter.call(query, condition); 295 | } else { 296 | query = query.where(this.lhs[i]); 297 | } 298 | } 299 | do { 300 | ntriples = query.length; 301 | sources = query.sources(); 302 | for (i = 0; i < ntriples; i += 1) { 303 | triples = sources[i]; 304 | add = true; 305 | cache = this.cache[data.id]; 306 | for (j = 0; j < triples.length; j += 1) { 307 | if (cache[triples[j]] === undefined) { 308 | cache[triples[j]] = {}; 309 | } else if (j === triples.length - 1) { 310 | add = false; 311 | } 312 | cache = cache[triples[j]]; 313 | } 314 | if (add) { 315 | q = query.eq(i); 316 | if (blanks) { 317 | for (j = 0; j < this.rhs.length; j += 1) { 318 | pattern = this.rhs[j]; 319 | s = pattern.subject; 320 | p = pattern.property; 321 | o = pattern.object; 322 | if (s.type === 'bnode') { 323 | s = $.rdf.blank('' + s + blankNodeNum); 324 | } 325 | if (p.type === 'bnode') { 326 | p = $.rdf.blank('' + p + blankNodeNum); 327 | } 328 | if (o.type === 'bnode') { 329 | o = $.rdf.blank('' + o + blankNodeNum); 330 | } 331 | pattern = $.rdf.pattern(s, p, o); 332 | q.add(pattern); 333 | } 334 | blankNodeNum += 1; 335 | } else if ($.isFunction(this.rhs)) { 336 | q.map(this.rhs); 337 | } else { 338 | for (j = 0; j < this.rhs.length; j += 1) { 339 | q.add(this.rhs[j]); 340 | } 341 | } 342 | } 343 | } 344 | limit -= 1; 345 | } while (query.length > ntriples && limit > 0); 346 | return this; 347 | } 348 | }; 349 | 350 | $.rdf.rule.fn.init.prototype = $.rdf.rule.fn; 351 | 352 | $.rdf.rule.defaults = { 353 | base: $.uri.base(), 354 | namespaces: {} 355 | }; 356 | 357 | $.extend($.rdf.databank.fn, { 358 | /** 359 | * @methodOf jQuery.rdf.databank# 360 | * @name jQuery.rdf.databank#reason 361 | * @description Reasons over this databank using the {@link jQuery.rdf.rule} or {@link jQuery.rdf.ruleset} given as the first argument. 362 | * @param {jQuery.rdf.rule|jQuery.rdf.ruleset} rules The rules to run over the databank. 363 | * @param {Object} [options] 364 | * @param {Integer} [options.limit=50] The rules in this ruleset are generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the ruleset will be run before halting. 365 | * @returns {jQuery.rdf.databank} The original {@link jQuery.rdf.databank}, although it may now contain more triples. 366 | * @see jQuery.rdf.ruleset#run 367 | * @see jQuery.rdf.rule#run 368 | */ 369 | reason: function (rule, options) { 370 | rule.run(this, options); 371 | return this; 372 | } 373 | }); 374 | 375 | $.extend($.rdf.fn, { 376 | /** 377 | * @methodOf jQuery.rdf# 378 | * @name jQuery.rdf#reason 379 | * @description Reasons over the {@link jQuery.rdf#databank} associated with this {@link jQuery.rdf} object using the {@link jQuery.rdf.rule} or {@link jQuery.rdf.ruleset} given as the first argument. 380 | * @param {jQuery.rdf.rule|jQuery.rdf.ruleset} rules The rules to run over the databank. 381 | * @param {Object} [options] 382 | * @param {Integer} [options.limit=50] The rules in this ruleset are generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the ruleset will be run before halting. 383 | * @returns {jQuery.rdf} The original {@link jQuery.rdf} object, although it may now contain more matches because of the new triples added to its underlying databank. 384 | * @see jQuery.rdf.ruleset#run 385 | * @see jQuery.rdf.rule#run 386 | */ 387 | reason: function (rule, options) { 388 | rule.run(this.databank, options); 389 | return this; 390 | } 391 | }); 392 | 393 | })(jQuery); 394 | -------------------------------------------------------------------------------- /thirdparty/jquery.rdf.xml.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery RDF @VERSION 3 | * 4 | * Copyright (c) 2008,2009 Jeni Tennison 5 | * Licensed under the MIT (MIT-LICENSE.txt) 6 | * 7 | * Depends: 8 | * jquery.uri.js 9 | * jquery.xmlns.js 10 | * jquery.datatype.js 11 | * jquery.curie.js 12 | * jquery.rdf.js 13 | * jquery.rdf.json.js 14 | * jquery.rdf.xml.js 15 | */ 16 | /** 17 | * @fileOverview jQuery RDF/XML parser 18 | * @author Jeni Tennison 19 | * @copyright (c) 2008,2009 Jeni Tennison 20 | * @license MIT license (MIT-LICENSE.txt) 21 | * @version 1.0 22 | */ 23 | /** 24 | * @exports $ as jQuery 25 | */ 26 | /** 27 | * @ignore 28 | */ 29 | (function ($) { 30 | var 31 | rdfNs = "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 32 | 33 | addAttribute = function (parent, namespace, name, value) { 34 | var doc = parent.ownerDocument, 35 | a; 36 | if (namespace !== undefined && namespace !== null) { 37 | if (doc.createAttributeNS) { 38 | a = doc.createAttributeNS(namespace, name); 39 | a.nodeValue = value; 40 | parent.attributes.setNamedItemNS(a); 41 | } else { 42 | a = doc.createNode(2, name, namespace); 43 | a.nodeValue = value; 44 | parent.attributes.setNamedItem(a); 45 | } 46 | } else { 47 | a = doc.createAttribute(name); 48 | a.nodeValue = value; 49 | parent.attributes.setNamedItem(a); 50 | } 51 | return parent; 52 | }, 53 | 54 | createXmlnsAtt = function (parent, namespace, prefix) { 55 | if (namespace === 'http://www.w3.org/XML/1998/namespace' || namespace === 'http://www.w3.org/2000/xmlns/') { 56 | } else if (prefix) { 57 | addAttribute(parent, 'http://www.w3.org/2000/xmlns/', 'xmlns:' + prefix, namespace); 58 | } else { 59 | addAttribute(parent, undefined, 'xmlns', namespace); 60 | } 61 | return parent; 62 | }, 63 | 64 | createDocument = function (namespace, name) { 65 | var doc, xmlns = '', prefix, addAttribute = false; 66 | if (namespace !== undefined && namespace !== null) { 67 | if (/:/.test(name)) { 68 | prefix = /([^:]+):/.exec(name)[1]; 69 | } 70 | addAttribute = true; 71 | } 72 | if (document.implementation && 73 | document.implementation.createDocument) { 74 | doc = document.implementation.createDocument(namespace, name, null); 75 | if (addAttribute) { 76 | createXmlnsAtt(doc.documentElement, namespace, prefix); 77 | } 78 | return doc; 79 | } else { 80 | doc = new ActiveXObject("Microsoft.XMLDOM"); 81 | doc.async = "false"; 82 | if (prefix === undefined) { 83 | xmlns = ' xmlns="' + namespace + '"'; 84 | } else { 85 | xmlns = ' xmlns:' + prefix + '="' + namespace + '"'; 86 | } 87 | doc.loadXML('<' + name + xmlns + '/>'); 88 | return doc; 89 | } 90 | }, 91 | 92 | appendElement = function (parent, namespace, name, indent) { 93 | var doc = parent.ownerDocument, 94 | e; 95 | if (namespace !== undefined && namespace !== null) { 96 | e = doc.createElementNS ? doc.createElementNS(namespace, name) : doc.createNode(1, name, namespace); 97 | } else { 98 | e = doc.createElement(name); 99 | } 100 | if (indent !== -1) { 101 | appendText(parent, '\n'); 102 | if (indent === 0) { 103 | appendText(parent, '\n'); 104 | } else { 105 | appendText(parent, ' '); 106 | } 107 | } 108 | parent.appendChild(e); 109 | return e; 110 | }, 111 | 112 | appendText = function (parent, text) { 113 | var doc = parent.ownerDocument, 114 | t; 115 | t = doc.createTextNode(text); 116 | parent.appendChild(t); 117 | return parent; 118 | }, 119 | 120 | appendXML = function (parent, xml) { 121 | var parser, doc, i, child; 122 | try { 123 | doc = new ActiveXObject('Microsoft.XMLDOM'); 124 | doc.async = "false"; 125 | doc.loadXML('' + xml + ''); 126 | } catch(e) { 127 | parser = new DOMParser(); 128 | doc = parser.parseFromString('' + xml + '', 'text/xml'); 129 | } 130 | for (i = 0; i < doc.documentElement.childNodes.length; i += 1) { 131 | parent.appendChild(doc.documentElement.childNodes[i].cloneNode(true)); 132 | } 133 | return parent; 134 | }, 135 | 136 | createRdfXml = function (triples, options) { 137 | var doc = createDocument(rdfNs, 'rdf:RDF'), 138 | dump = $.rdf.parsers['application/json'].dump(triples), 139 | namespaces = options.namespaces || {}, 140 | indent = options.indent || false, 141 | n, s, se, p, pe, i, v, 142 | m, local, ns, prefix; 143 | for (n in namespaces) { 144 | createXmlnsAtt(doc.documentElement, namespaces[n], n); 145 | } 146 | for (s in dump) { 147 | if (dump[s][$.rdf.type.value] !== undefined) { 148 | m = /(.+[#\/])([^#\/]+)/.exec(dump[s][$.rdf.type.value][0].value); 149 | ns = m[1]; 150 | local = m[2]; 151 | for (n in namespaces) { 152 | if (namespaces[n].toString() === ns) { 153 | prefix = n; 154 | break; 155 | } 156 | } 157 | se = appendElement(doc.documentElement, ns, prefix + ':' + local, indent ? 0 : -1); 158 | } else { 159 | se = appendElement(doc.documentElement, rdfNs, 'rdf:Description', indent ? 0 : -1); 160 | } 161 | if (/^_:/.test(s)) { 162 | addAttribute(se, rdfNs, 'rdf:nodeID', s.substring(2)); 163 | } else { 164 | addAttribute(se, rdfNs, 'rdf:about', s); 165 | } 166 | for (p in dump[s]) { 167 | if (p !== $.rdf.type.value.toString() || dump[s][p].length > 1) { 168 | m = /(.+[#\/])([^#\/]+)/.exec(p); 169 | ns = m[1]; 170 | local = m[2]; 171 | for (n in namespaces) { 172 | if (namespaces[n].toString() === ns) { 173 | prefix = n; 174 | break; 175 | } 176 | } 177 | for (i = (p === $.rdf.type.value.toString() ? 1 : 0); i < dump[s][p].length; i += 1) { 178 | v = dump[s][p][i]; 179 | pe = appendElement(se, ns, prefix + ':' + local, indent ? 1 : -1); 180 | if (v.type === 'uri') { 181 | addAttribute(pe, rdfNs, 'rdf:resource', v.value); 182 | } else if (v.type === 'literal') { 183 | if (v.datatype !== undefined) { 184 | if (v.datatype === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral') { 185 | addAttribute(pe, rdfNs, 'rdf:parseType', 'Literal'); 186 | if (indent) { 187 | appendText(pe, '\n '); 188 | } 189 | appendXML(pe, v.value); 190 | if (indent) { 191 | appendText(pe, '\n '); 192 | } 193 | } else { 194 | addAttribute(pe, rdfNs, 'rdf:datatype', v.datatype); 195 | appendText(pe, v.value); 196 | } 197 | } else if (v.lang !== undefined) { 198 | addAttribute(pe, 'http://www.w3.org/XML/1998/namespace', 'xml:lang', v.lang); 199 | appendText(pe, v.value); 200 | } else { 201 | appendText(pe, v.value); 202 | } 203 | } else { 204 | // blank node 205 | addAttribute(pe, rdfNs, 'rdf:nodeID', v.value.substring(2)); 206 | } 207 | } 208 | if (indent) { 209 | appendText(se, '\n'); 210 | } 211 | } 212 | } 213 | } 214 | if (indent) { 215 | appendText(doc.documentElement, '\n\n'); 216 | } 217 | return doc; 218 | }, 219 | 220 | getDefaultNamespacePrefix = function (namespaceUri) { 221 | switch (namespaceUri) { 222 | case 'http://www.w3.org/1999/02/22-rdf-syntax-ns': 223 | return 'rdf'; 224 | case 'http://www.w3.org/XML/1998/namespace': 225 | return 'xml'; 226 | case 'http://www.w3.org/2000/xmlns/': 227 | return 'xmlns'; 228 | default: 229 | throw ('No default prefix mapped for namespace ' + namespaceUri); 230 | } 231 | }, 232 | 233 | hasAttributeNS = function(elem, namespace, name){ 234 | var basename; 235 | if (elem.hasAttributeNS) { 236 | return elem.hasAttributeNS(namespace, name); 237 | } else { 238 | try { 239 | basename = /:/.test(name) ? /:(.+)$/.exec(name)[1] : name; 240 | return elem.attributes.getQualifiedItem(basename, namespace) !== null; 241 | } catch (e) { 242 | return elem.getAttribute(getDefaultNamespacePrefix(namespace) + ':' + name) !== null; 243 | } 244 | } 245 | }, 246 | 247 | getAttributeNS = function(elem, namespace, name){ 248 | var basename; 249 | if (elem.getAttributeNS) { 250 | return elem.getAttributeNS(namespace, name); 251 | } else { 252 | try { 253 | basename = /:/.test(name) ? /:(.+)$/.exec(name)[1] : name; 254 | return elem.attributes.getQualifiedItem(basename, namespace).nodeValue; 255 | } catch (e) { 256 | return elem.getAttribute(getDefaultNamespacePrefix(namespace) + ':' + name); 257 | } 258 | } 259 | }, 260 | 261 | getLocalName = function(elem){ 262 | return elem.localName || elem.baseName; 263 | }, 264 | 265 | parseRdfXmlSubject = function (elem, base) { 266 | var s, subject; 267 | if (hasAttributeNS(elem, rdfNs, 'about')) { 268 | s = getAttributeNS(elem, rdfNs, 'about'); 269 | subject = $.rdf.resource('<' + s + '>', { base: base }); 270 | } else if (hasAttributeNS(elem, rdfNs, 'ID')) { 271 | s = getAttributeNS(elem, rdfNs, 'ID'); 272 | subject = $.rdf.resource('<#' + s + '>', { base: base }); 273 | } else if (hasAttributeNS(elem, rdfNs, 'nodeID')) { 274 | s = getAttributeNS(elem, rdfNs, 'nodeID'); 275 | subject = $.rdf.blank('_:' + s); 276 | } else { 277 | subject = $.rdf.blank('[]'); 278 | } 279 | return subject; 280 | }, 281 | 282 | parseRdfXmlDescription = function (elem, isDescription, base, lang) { 283 | var subject, p, property, o, object, reified, lang, i, j, li = 1, 284 | collection1, collection2, collectionItem, collectionItems = [], 285 | parseType, serializer, literalOpts = {}, oTriples, triples = []; 286 | lang = getAttributeNS(elem, 'http://www.w3.org/XML/1998/namespace', 'lang') || lang; 287 | base = getAttributeNS(elem, 'http://www.w3.org/XML/1998/namespace', 'base') || base; 288 | if (lang !== null && lang !== undefined && lang !== '') { 289 | literalOpts = { lang: lang }; 290 | } 291 | subject = parseRdfXmlSubject(elem, base); 292 | if (isDescription && (elem.namespaceURI !== rdfNs || getLocalName(elem) !== 'Description')) { 293 | property = $.rdf.type; 294 | object = $.rdf.resource('<' + elem.namespaceURI + getLocalName(elem) + '>'); 295 | triples.push($.rdf.triple(subject, property, object)); 296 | } 297 | for (i = 0; i < elem.attributes.length; i += 1) { 298 | p = elem.attributes.item(i); 299 | if (p.namespaceURI !== undefined && 300 | p.namespaceURI !== 'http://www.w3.org/2000/xmlns/' && 301 | p.namespaceURI !== 'http://www.w3.org/XML/1998/namespace' && 302 | p.prefix !== 'xmlns' && 303 | p.prefix !== 'xml') { 304 | if (p.namespaceURI !== rdfNs) { 305 | property = $.rdf.resource('<' + p.namespaceURI + getLocalName(p) + '>'); 306 | object = $.rdf.literal(literalOpts.lang ? p.nodeValue : '"' + p.nodeValue.replace(/"/g, '\\"') + '"', literalOpts); 307 | triples.push($.rdf.triple(subject, property, object)); 308 | } else if (getLocalName(p) === 'type') { 309 | property = $.rdf.type; 310 | object = $.rdf.resource('<' + p.nodeValue + '>', { base: base }); 311 | triples.push($.rdf.triple(subject, property, object)); 312 | } 313 | } 314 | } 315 | var parentLang = lang; 316 | for (i = 0; i < elem.childNodes.length; i += 1) { 317 | p = elem.childNodes[i]; 318 | if (p.nodeType === 1) { 319 | if (p.namespaceURI === rdfNs && getLocalName(p) === 'li') { 320 | property = $.rdf.resource('<' + rdfNs + '_' + li + '>'); 321 | li += 1; 322 | } else { 323 | property = $.rdf.resource('<' + p.namespaceURI + getLocalName(p) + '>'); 324 | } 325 | lang = getAttributeNS(p, 'http://www.w3.org/XML/1998/namespace', 'lang') || parentLang; 326 | if (lang !== null && lang !== undefined && lang !== '') { 327 | literalOpts = { lang: lang }; 328 | } else { 329 | literalOpts = {}; 330 | } 331 | if (hasAttributeNS(p, rdfNs, 'resource')) { 332 | o = getAttributeNS(p, rdfNs, 'resource'); 333 | object = $.rdf.resource('<' + o + '>', { base: base }); 334 | } else if (hasAttributeNS(p, rdfNs, 'nodeID')) { 335 | o = getAttributeNS(p, rdfNs, 'nodeID'); 336 | object = $.rdf.blank('_:' + o); 337 | } else if (hasAttributeNS(p, rdfNs, 'parseType')) { 338 | parseType = getAttributeNS(p, rdfNs, 'parseType'); 339 | if (parseType === 'Literal') { 340 | try { 341 | serializer = new XMLSerializer(); 342 | o = serializer.serializeToString(p.getElementsByTagName('*')[0]); 343 | } catch (e) { 344 | o = ""; 345 | for (j = 0; j < p.childNodes.length; j += 1) { 346 | o += p.childNodes[j].xml; 347 | } 348 | } 349 | object = $.rdf.literal(o, { datatype: rdfNs + 'XMLLiteral' }); 350 | } else if (parseType === 'Resource') { 351 | oTriples = parseRdfXmlDescription(p, false, base, lang); 352 | if (oTriples.length > 0) { 353 | object = oTriples[oTriples.length - 1].subject; 354 | triples = triples.concat(oTriples); 355 | } else { 356 | object = $.rdf.blank('[]'); 357 | } 358 | } else if (parseType === 'Collection') { 359 | if (p.getElementsByTagName('*').length > 0) { 360 | for (j = 0; j < p.childNodes.length; j += 1) { 361 | o = p.childNodes[j]; 362 | if (o.nodeType === 1) { 363 | collectionItems.push(o); 364 | } 365 | } 366 | collection1 = $.rdf.blank('[]'); 367 | object = collection1; 368 | for (j = 0; j < collectionItems.length; j += 1) { 369 | o = collectionItems[j]; 370 | oTriples = parseRdfXmlDescription(o, true, base, lang); 371 | if (oTriples.length > 0) { 372 | collectionItem = oTriples[oTriples.length - 1].subject; 373 | triples = triples.concat(oTriples); 374 | } else { 375 | collectionItem = parseRdfXmlSubject(o); 376 | } 377 | triples.push($.rdf.triple(collection1, $.rdf.first, collectionItem)); 378 | if (j === collectionItems.length - 1) { 379 | triples.push($.rdf.triple(collection1, $.rdf.rest, $.rdf.nil)); 380 | } else { 381 | collection2 = $.rdf.blank('[]'); 382 | triples.push($.rdf.triple(collection1, $.rdf.rest, collection2)); 383 | collection1 = collection2; 384 | } 385 | } 386 | } else { 387 | object = $.rdf.nil; 388 | } 389 | } 390 | } else if (hasAttributeNS(p, rdfNs, 'datatype')) { 391 | o = p.childNodes[0] ? p.childNodes[0].nodeValue : ""; 392 | object = $.rdf.literal(o, { datatype: getAttributeNS(p, rdfNs, 'datatype') }); 393 | } else if (p.getElementsByTagName('*').length > 0) { 394 | for (j = 0; j < p.childNodes.length; j += 1) { 395 | o = p.childNodes[j]; 396 | if (o.nodeType === 1) { 397 | oTriples = parseRdfXmlDescription(o, true, base, lang); 398 | if (oTriples.length > 0) { 399 | object = oTriples[oTriples.length - 1].subject; 400 | triples = triples.concat(oTriples); 401 | } else { 402 | object = parseRdfXmlSubject(o); 403 | } 404 | } 405 | } 406 | } else if (p.childNodes.length > 0) { 407 | o = p.childNodes[0].nodeValue; 408 | object = $.rdf.literal(literalOpts.lang ? o : '"' + o.replace(/"/g, '\\"') + '"', literalOpts); 409 | } else { 410 | oTriples = parseRdfXmlDescription(p, false, base, lang); 411 | if (oTriples.length > 0) { 412 | object = oTriples[oTriples.length - 1].subject; 413 | triples = triples.concat(oTriples); 414 | } else { 415 | object = $.rdf.blank('[]'); 416 | } 417 | } 418 | triples.push($.rdf.triple(subject, property, object)); 419 | if (hasAttributeNS(p, rdfNs, 'ID')) { 420 | reified = $.rdf.resource('<#' + getAttributeNS(p, rdfNs, 'ID') + '>', { base: base }); 421 | triples.push($.rdf.triple(reified, $.rdf.subject, subject)); 422 | triples.push($.rdf.triple(reified, $.rdf.property, property)); 423 | triples.push($.rdf.triple(reified, $.rdf.object, object)); 424 | } 425 | } 426 | } 427 | return triples; 428 | }, 429 | 430 | parseRdfXml = function (doc) { 431 | var i, lang, d, triples = []; 432 | if (doc.documentElement.namespaceURI === rdfNs && getLocalName(doc.documentElement) === 'RDF') { 433 | lang = getAttributeNS(doc.documentElement, 'http://www.w3.org/XML/1998/namespace', 'lang'); 434 | base = getAttributeNS(doc.documentElement, 'http://www.w3.org/XML/1998/namespace', 'base') || $.uri.base(); 435 | triples = $.map(doc.documentElement.childNodes, function (d) { 436 | if (d.nodeType === 1) { 437 | return parseRdfXmlDescription(d, true, base, lang); 438 | } else { 439 | return null; 440 | } 441 | }); 442 | /* 443 | for (i = 0; i < doc.documentElement.childNodes.length; i += 1) { 444 | d = doc.documentElement.childNodes[i]; 445 | if (d.nodeType === 1) { 446 | triples = triples.concat(parseRdfXmlDescription(d, true, base, lang)); 447 | } 448 | } 449 | */ 450 | } else { 451 | triples = parseRdfXmlDescription(doc.documentElement, true); 452 | } 453 | return triples; 454 | }; 455 | 456 | $.rdf.parsers['application/rdf+xml'] = { 457 | parse: function (data) { 458 | var doc; 459 | try { 460 | doc = new ActiveXObject("Microsoft.XMLDOM"); 461 | doc.async = "false"; 462 | doc.loadXML(data); 463 | } catch(e) { 464 | var parser = new DOMParser(); 465 | doc = parser.parseFromString(data, 'text/xml'); 466 | } 467 | return doc; 468 | }, 469 | serialize: function (data) { 470 | if (data.xml) { 471 | return data.xml.replace(/\s+$/,''); 472 | } else { 473 | serializer = new XMLSerializer(); 474 | return serializer.serializeToString(data); 475 | } 476 | }, 477 | triples: parseRdfXml, 478 | dump: createRdfXml 479 | }; 480 | 481 | })(jQuery); 482 | --------------------------------------------------------------------------------