├── faq.css
├── .gitignore
├── h1-p
    ├── gene.dev.head
    ├── gene.key.head
    ├── gene.train.head
    ├── gene.train.head2
    ├── hash.js
    ├── tag.html
    ├── HashSpec.js
    ├── emission_count.html
    ├── index.html
    ├── viterbi.html
    ├── count.html
    ├── submit.py
    ├── count_freqs.py
    ├── viterbi.js
    ├── ViterbiSpec.js
    └── eval_gene_tagger.py
├── mergeupstream.sh
├── thirdparty
    ├── jasmine-standalone-1.3.1
    │   ├── .DS_Store
    │   ├── src
    │   │   ├── Song.js
    │   │   └── Player.js
    │   ├── spec
    │   │   ├── SpecHelper.js
    │   │   └── PlayerSpec.js
    │   └── lib
    │   │   └── jasmine-1.3.1
    │   │       ├── MIT.LICENSE
    │   │       └── jasmine.css
    ├── jquery.rules.rdfs.js
    ├── jquery.rdf.json.js
    ├── qunit-1.10.0.css
    ├── jquery.icndb.js
    ├── removeStopWords.js
    ├── jquery.uri.js
    ├── jquery.xmlns.js
    ├── jquery.curie.js
    ├── jquery.datatype.js
    ├── jquery.rules.js
    └── jquery.rdf.xml.js
├── initial_kb.txt
├── faq.js
├── index.html
├── tests.js
├── qunit.html
├── faq.html
├── jasmine.html
├── storageSpec.js
├── README.md
├── querySpec.js
├── storage.js
└── query.js


/faq.css:
--------------------------------------------------------------------------------
1 | #history{
2 |     width:100%;
3 |     height:100%;
4 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | h1-p/gene_test.p1.out
3 | h1-p/*.pdf
4 | joke.json
5 | */.DS_Store
6 | *~
7 | 


--------------------------------------------------------------------------------
/h1-p/gene.dev.head:
--------------------------------------------------------------------------------
 1 | BACKGROUND
 2 | :
 3 | Ischemic
 4 | heart
 5 | disease
 6 | is
 7 | the
 8 | primary
 9 | cause
10 | of
11 | 


--------------------------------------------------------------------------------
/mergeupstream.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | git fetch upstream
3 | git checkout master
4 | git merge upstream/master
5 | git push origin master


--------------------------------------------------------------------------------
/thirdparty/jasmine-standalone-1.3.1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tansaku/faqbot/HEAD/thirdparty/jasmine-standalone-1.3.1/.DS_Store


--------------------------------------------------------------------------------
/h1-p/gene.key.head:
--------------------------------------------------------------------------------
 1 | BACKGROUND O
 2 | : O
 3 | Ischemic O
 4 | heart O
 5 | disease O
 6 | is O
 7 | the O
 8 | primary O
 9 | cause O
10 | of O
11 | 


--------------------------------------------------------------------------------
/h1-p/gene.train.head:
--------------------------------------------------------------------------------
 1 | Comparison O
 2 | Comparison O
 3 | with O
 4 | alkaline I-GENE
 5 | phosphatases I-GENE
 6 | and O
 7 | 5 I-GENE
 8 | - I-GENE
 9 | nucleotidase I-GENE
10 | 
11 | Pharmacologic O
12 | 


--------------------------------------------------------------------------------
/thirdparty/jasmine-standalone-1.3.1/src/Song.js:
--------------------------------------------------------------------------------
1 | function Song() {
2 | }
3 | 
4 | Song.prototype.persistFavoriteStatus = function(value) {
5 |   // something complicated
6 |   throw new Error("not yet implemented");
7 | };


--------------------------------------------------------------------------------
/thirdparty/jasmine-standalone-1.3.1/spec/SpecHelper.js:
--------------------------------------------------------------------------------
 1 | beforeEach(function() {
 2 |   this.addMatchers({
 3 |     toBePlaying: function(expectedSong) {
 4 |       var player = this.actual;
 5 |       return player.currentlyPlayingSong === expectedSong && 
 6 |              player.isPlaying;
 7 |     }
 8 |   });
 9 | });
10 | 


--------------------------------------------------------------------------------
/initial_kb.txt:
--------------------------------------------------------------------------------
1 | # We'll use the FOAF vocabularly to represent people
2 | # http://en.wikipedia.org/wiki/FOAF_%28software%29
3 | @prefix foaf: <http://xmlns.com/foaf/0.1/> . 
4 | @prefix dc: <http://purl.org/dc/elements/1.1/> . 
5 | @prefix dct: <http://purl.org/dc/terms/> . 
6 | _:sam a foaf:Person ; foaf:name "Sam Joseph" . 
7 | _:dave a foaf:Person ; foaf:name "Dave Snowdon" .
8 | 


--------------------------------------------------------------------------------
/faq.js:
--------------------------------------------------------------------------------
 1 | $(document).ready(function () {
 2 |      $("input#sentence").keypress(function(event) {
 3 |         if (event.which == 13) {
 4 |             event.preventDefault();
 5 |             $("form#chat").submit();
 6 |         }
 7 |     });
 8 |     
 9 |     $("form#chat").submit(function () {
10 |         return handleChat($("input#sentence").val());
11 |     });
12 |     showTranscript(storage);
13 | });
14 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML>
 2 | <html lang="en-US">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="refresh" content="1;url=faq.html">
 6 |     <script type="text/javascript">
 7 |         window.location.href = "faq.html"
 8 |     </script>
 9 |     <title>Page Redirection</title>
10 | </head>
11 | <body>
12 | <!-- Note: don't tell people to `click` the link, just tell them that it is a link -->
13 | If you are not redirected automatically, follow the <a href='faq.html'>link to the faqbot</a>
14 | </body>
15 | </html>


--------------------------------------------------------------------------------
/thirdparty/jasmine-standalone-1.3.1/src/Player.js:
--------------------------------------------------------------------------------
 1 | function Player() {
 2 | }
 3 | Player.prototype.play = function(song) {
 4 |   this.currentlyPlayingSong = song;
 5 |   this.isPlaying = true;
 6 | };
 7 | 
 8 | Player.prototype.pause = function() {
 9 |   this.isPlaying = false;
10 | };
11 | 
12 | Player.prototype.resume = function() {
13 |   if (this.isPlaying) {
14 |     throw new Error("song is already playing");
15 |   }
16 | 
17 |   this.isPlaying = true;
18 | };
19 | 
20 | Player.prototype.makeFavorite = function() {
21 |   this.currentlyPlayingSong.persistFavoriteStatus(true);
22 | };


--------------------------------------------------------------------------------
/h1-p/gene.train.head2:
--------------------------------------------------------------------------------
 1 | Comparison O
 2 | Comparison O
 3 | with O
 4 | alkaline I-GENE
 5 | phosphatases I-GENE
 6 | and O
 7 | 5 I-GENE
 8 | - I-GENE
 9 | nucleotidase I-GENE
10 | 
11 | Pharmacologic O
12 | aspects O
13 | of O
14 | neonatal O
15 | hyperbilirubinemia O
16 | . O
17 | 
18 | When O
19 | CSF O
20 | [ O
21 | HCO3 O
22 | -] O
23 | is O
24 | shown O
25 | as O
26 | a O
27 | function O
28 | of O
29 | CSF O
30 | PCO2 O
31 | the O
32 | data O
33 | of O
34 | K O
35 | - O
36 | depleted O
37 | rats O
38 | are O
39 | no O
40 | longer O
41 | displaced O
42 | when O
43 | compared O
44 | to O
45 | controls O
46 | but O
47 | still O
48 | have O
49 | a O
50 | significantly O
51 | 


--------------------------------------------------------------------------------
/h1-p/hash.js:
--------------------------------------------------------------------------------
 1 | function Hash(starting_state, default_return){
 2 | 	this.default = 0;
 3 | 	if(default_return !== undefined){
 4 | 		this.default = default_return;
 5 | 	}
 6 | 	this.hash = {};
 7 | 	if(starting_state !== undefined){
 8 | 		this.hash = starting_state;
 9 | 	}
10 | 	
11 | 	this.get = function(array){
12 | 		var temp = this.hash;
13 | 		for(var i in array){
14 | 			if(temp[array[i]] === undefined){
15 | 				return this.default;
16 | 			}
17 | 			temp = temp[array[i]];
18 | 		}
19 | 		return temp;
20 | 	}
21 | 	this.delete = function(array){
22 | 		var temp = this.hash;
23 | 		for(var i in array){
24 | 			if(temp[array[i]] === undefined){
25 | 				return this.default;
26 | 			}
27 | 			if(i == array.length-1){
28 | 				temp[array[i]] = this.default;
29 | 			}
30 | 			temp = temp[array[i]];
31 | 		}
32 | 		return temp;		
33 | 	}
34 | 	this.set = function(array, value){
35 | 		var temp = this.hash;
36 | 		for(var i in array){
37 | 			if(temp[array[i]] === undefined){
38 | 				temp[array[i]] = {};
39 | 			}
40 | 			if(i == array.length-1){
41 | 				temp[array[i]] = value;
42 | 			}
43 | 			temp = temp[array[i]];
44 | 		}
45 | 		return temp;
46 | 	}
47 | }


--------------------------------------------------------------------------------
/thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/MIT.LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2008-2011 Pivotal Labs
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/tests.js:
--------------------------------------------------------------------------------
 1 | // would like to know about grouping tests within QUnit ...
 2 | test( "a basic test example", function() {
 3 |   var value = "hello";
 4 |   equal( value, "hello", "We expect value to be hello" );
 5 | });
 6 | test(  "testing NLP", function() {  
 7 |   var sentence = "There is a game engine Unreal Engine";
 8 |   var result = query(sentence);
 9 |   equal( result, "what was that?");
10 |   // "game_engines", "Unreal Engine", {"name":"Unreal Engine","ident":"Unreal Engine"})
11 | });
12 | var sentences = [];
13 | var answers = [];
14 | sentences[0] = "There is a game engine called Unreal Engine";
15 | answers[0] = "Unreal Engine is a game engine";
16 | sentences[1] = "There is a horse called Matilda";
17 | answers[1] = "Matilda is a horse";
18 | sentences[2] = "There is a course called ML";
19 | answers[2] = "ML is a course";
20 | for (var i in sentences){
21 |   test(  "\""+sentences[i] + "\" --> \"" + answers[i]+ "\"", function() {  
22 |     var result = query(sentences[i]);
23 |     equal( result,  answers[i]);
24 |   });
25 | }
26 | test(  "testing Natural", function() { 
27 |   var result = natural.SoundEx.compare('phone', 'pone'); 
28 |   equal( result, true, "We expect value to be true" );
29 |  });


--------------------------------------------------------------------------------
/h1-p/tag.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
 2 |   "http://www.w3.org/TR/html4/loose.dtd">
 3 | <html>
 4 | <head>
 5 |   <title>Count Stuff</title>
 6 | 
 7 |   <link rel="shortcut icon" type="image/png" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine_favicon.png">
 8 |   <link rel="stylesheet" type="text/css" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css">
 9 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.js"></script>
10 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine-html.js"></script>
11 | 
12 |   <!-- include source files here... -->
13 | 
14 |   <script type="text/javascript" src="../thirdparty/jquery-1.7.1.js"></script>
15 | 
16 |   <script src="hash.js"></script>
17 |   <script src="viterbi.js"></script>
18 | 
19 |   <div id="output"><br/></div>
20 |   <script type="text/javascript">
21 |     (function() {
22 |       callAjax(function(trainingData){
23 |         var result = rarify(count(trainingData),'_RARE_',5);
24 |         callAjax(function(devData){
25 |           var tagged = tag(devData, result);
26 |           var lines = tagged.split('\n');
27 |           for(var i in lines){
28 |             $("div#output").append(lines[i]+'<br/>');
29 |           }
30 |         },'gene.test');
31 |       },'gene.train');
32 |     })();
33 |   </script>
34 |  
35 | 
36 | </head>
37 | 
38 | <body>
39 | </body>
40 | </html>
41 | 


--------------------------------------------------------------------------------
/qunit.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <title>QUnit Tests for FaqBot</title>
 6 |   <link rel="stylesheet" localhref="thirdparty/qunit-1.10.0.css" href="http://code.jquery.com/qunit/qunit-1.10.0.css">
 7 | </head>
 8 | <body>
 9 | 	<div id="qunit"></div>
10 | 	<div id="qunit-fixture"></div>  
11 | 	<script type="text/javascript" src="thirdparty/jquery-1.7.1.js"></script>
12 | 	<script type="text/javascript" src="thirdparty/jquery.uri.js"></script>
13 | 	<script type="text/javascript" src="thirdparty/jquery.xmlns.js"></script>
14 | 	<script type="text/javascript" src="thirdparty/jquery.curie.js"></script>
15 | 	<script type="text/javascript" src="thirdparty/jquery.datatype.js"></script>
16 | 	<script type="text/javascript" src="thirdparty/jquery.rdf.js"></script>
17 | 	<script type="text/javascript" src="thirdparty/jquery.rdfa.js"></script>
18 | 	<script type="text/javascript" src="thirdparty/jquery.rules.js"></script>
19 | 	<script type="text/javascript" src="thirdparty/jquery.rdf.json.js"></script>
20 | 	<script type="text/javascript" src="thirdparty/jquery.rdf.turtle.js"></script>
21 | 	<script type="text/javascript" src="thirdparty/jquery.icndb.js"></script>
22 | 	<script type="text/javascript" src="thirdparty/xregexp.js"></script>
23 | 	<script type="text/javascript" src="thirdparty/natural.js"></script>
24 | 	<script type="text/javascript" src="thirdparty/removeStopWords.js"></script> 
25 | 	<script src="storage.js"></script>
26 | 	<script src="faq.js"></script>
27 | 	<script src="thirdparty/qunit-1.10.0.js"></script>
28 | 	<script src="query.js"></script>
29 | 	<script src="tests.js" ></script>
30 | </body>
31 | </html>
32 | 


--------------------------------------------------------------------------------
/h1-p/HashSpec.js:
--------------------------------------------------------------------------------
 1 | describe("Hash", function() {
 2 | 	var DEFAULT = 0;
 3 | 
 4 | 	var hash;
 5 | 
 6 | 	beforeEach(function() {
 7 | 		hash = new Hash();
 8 | 	});
 9 | 
10 | 	describe("starting state", function () {
11 | 		it("should have starting state key/value pairs we pass in", function () {
12 | 		    hash = new Hash({'1':'a','2':'b','3':'c'}, DEFAULT);
13 | 		    expect(hash.get(['1'])).toEqual('a');
14 | 		    expect(hash.get(['2'])).toEqual('b');
15 | 		    expect(hash.get(['3'])).toEqual('c');
16 | 		    expect(hash.get(['4'])).toEqual(DEFAULT);
17 | 		});
18 | 	});
19 | 	describe("get", function () {
20 | 		it("should get the default value", function () {
21 | 		    expect(hash.get(['a'])).toEqual(DEFAULT);
22 | 		});
23 | 		it("should get the default value even when requesting nested hashes", function () {
24 | 		    expect(hash.get(['a','b'])).toEqual(DEFAULT);
25 | 		});
26 | 	});
27 | 	// TODO should check we throw an exception if get, delete or set is queried with something other than an array
28 | 	describe("delete", function () {
29 | 		it("should get the delete the appropriate value", function () {
30 | 			var value = 12;
31 | 			hash.set(['a'],value);
32 | 		    expect(hash.get(['a'])).toEqual(value);
33 | 		    hash.delete(['a']);
34 | 		    expect(hash.get(['a'])).toEqual(DEFAULT);
35 | 		});
36 | 
37 | 	});
38 | 	describe("set", function () {
39 | 		it("should get the set value", function () {
40 | 			var value = 12;
41 | 			hash.set(['a'],value);
42 | 		    expect(hash.get(['a'])).toEqual(value);
43 | 		});
44 | 		it("should get the default value even when requesting nested hashes", function () {
45 | 			var value = 12;
46 | 			hash.set(['a','b'],value);
47 | 		    expect(hash.get(['a','b'])).toEqual(value);
48 | 		});
49 | 	});
50 | });
51 | 


--------------------------------------------------------------------------------
/h1-p/emission_count.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
 2 |   "http://www.w3.org/TR/html4/loose.dtd">
 3 | <html>
 4 | <head>
 5 |   <title>Count Stuff</title>
 6 | 
 7 |   <link rel="shortcut icon" type="image/png" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine_favicon.png">
 8 |   <link rel="stylesheet" type="text/css" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css">
 9 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.js"></script>
10 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine-html.js"></script>
11 | 
12 |   <!-- include source files here... -->
13 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/src/Player.js"></script>
14 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/src/Song.js"></script>
15 | 
16 |   <script type="text/javascript" src="../thirdparty/jquery-1.7.1.js"></script>
17 | 
18 |   <script src="viterbi.js"></script>
19 |   <script src="Hash.js"></script>
20 | 
21 |   <div id="output"><br/></div>
22 |   <script type="text/javascript">
23 |     (function() {
24 |       callAjax(function(data){
25 |         var result = rarify(count(data),'_RARE_',5);
26 |         var word_tags = result.word_tags;
27 |         var grams = result.grams;
28 |         for(var word in word_tags.hash){
29 |           for(var category in word_tags.get([word])){
30 |             var c = word_tags.get([word,category]);
31 |             $("div#output").append('e('+word+'|'+category+') = ('+c+'/'+grams.get(['1',category])+') = '+c/grams.get(['1',category])+' = '+emission(word,category,word_tags,grams)+'<br/>');
32 |           }
33 |         }
34 |       },'gene.train');
35 |     })();
36 |   </script>
37 |  
38 | 
39 | </head>
40 | 
41 | <body>
42 | </body>
43 | </html>
44 | 


--------------------------------------------------------------------------------
/h1-p/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
 2 |   "http://www.w3.org/TR/html4/loose.dtd">
 3 | <html>
 4 | <head>
 5 |   <title>Jasmine Spec Runner</title>
 6 | 
 7 |   <link rel="shortcut icon" type="image/png" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine_favicon.png">
 8 |   <link rel="stylesheet" type="text/css" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css">
 9 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.js"></script>
10 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine-html.js"></script>
11 | 
12 |   <!-- include source files here... -->
13 |   <script type="text/javascript" src="../thirdparty/jquery-1.7.1.js"></script>
14 |   
15 | 
16 |   <script src="viterbi.js"></script>
17 |   <script src="hash.js"></script>
18 | 
19 |   <!-- include spec files here... -->
20 |   <script type="text/javascript" src="ViterbiSpec.js"></script>
21 |   <script type="text/javascript" src="HashSpec.js"></script>
22 | 
23 |   <script type="text/javascript">
24 |     (function() {
25 |       var jasmineEnv = jasmine.getEnv();
26 |       jasmineEnv.updateInterval = 1000;
27 | 
28 |       var htmlReporter = new jasmine.HtmlReporter();
29 | 
30 |       jasmineEnv.addReporter(htmlReporter);
31 | 
32 |       jasmineEnv.specFilter = function(spec) {
33 |         return htmlReporter.specFilter(spec);
34 |       };
35 | 
36 |       var currentWindowOnload = window.onload;
37 | 
38 |       window.onload = function() {
39 |         if (currentWindowOnload) {
40 |           currentWindowOnload();
41 |         }
42 |         execJasmine();
43 |       };
44 | 
45 |       function execJasmine() {
46 |         jasmineEnv.execute();
47 |       }
48 | 
49 |     })();
50 |   </script>
51 | 
52 | </head>
53 | 
54 | <body>
55 | </body>
56 | </html>
57 | 


--------------------------------------------------------------------------------
/h1-p/viterbi.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
 2 |   "http://www.w3.org/TR/html4/loose.dtd">
 3 | <html>
 4 | <head>
 5 |   <title>Viterbi Algorithm</title>
 6 | 
 7 |   <link rel="shortcut icon" type="image/png" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine_favicon.png">
 8 |   <link rel="stylesheet" type="text/css" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css">
 9 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.js"></script>
10 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine-html.js"></script>
11 | 
12 |   <!-- include source files here... -->
13 | 
14 |   <script type="text/javascript" src="../thirdparty/jquery-1.7.1.js"></script>
15 | 
16 |   <script src="hash.js"></script>
17 |   <script src="viterbi.js"></script>
18 | 
19 |   <div id="output"><br/></div>
20 |   <script type="text/javascript">
21 |     (function() {
22 |       callAjax(function(trainingData){
23 |         var model = rarify(count(trainingData),'_RARE_',5);
24 |         callAjax(function(devData){
25 |           var devLines = devData.split('\n');
26 |             var sentence = "";
27 |             for(var i in devLines){
28 |             if(devLines[i] != ''){
29 |               //debugger
30 |               sentence += devLines[i] + ' ';
31 |             }
32 |             else{
33 |               //debugger
34 |               var result = viterbi(sentence, model);
35 |               var lines = sentence.split(' ');
36 |               for(var j in lines){
37 |                 $("div#output").append(lines[j]+" "+result.tag_sequence[j]+'<br/>');
38 |               }
39 |               $("div#output").append('<br/>');
40 |               sentence = "";
41 |             }
42 |           }
43 |           
44 |         },'gene.test');
45 |       },'gene.train');
46 |     })();
47 |   </script>
48 |  
49 | 
50 | </head>
51 | 
52 | <body>
53 | </body>
54 | </html>
55 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.rules.rdfs.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * jQuery RDF Ontology @VERSION
 3 |  * 
 4 |  * Copyright (c) 2009 Jeni Tennison
 5 |  * Licensed under the MIT (MIT-LICENSE.txt)
 6 |  *
 7 |  * Depends:
 8 |  *  jquery.uri.js
 9 |  *  jquery.xmlns.js
10 |  *  jquery.datatype.js
11 |  *  jquery.curie.js
12 |  *  jquery.rdf.js
13 |  *  jquery.rules.js
14 |  */
15 | /*global jQuery */
16 | (function ($) {
17 | 
18 |   var 
19 |     nsRdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
20 |     nsRdfs = "http://www.w3.org/2000/01/rdf-schema#";
21 | 
22 |   $.rdf.ruleset.rdfs = $.rdf.ruleset()
23 |     .prefix('rdf', nsRdf)
24 |     .prefix('rdfs', nsRdfs)
25 |     .add('?subject ?property ?object', '?property a rdf:Property')
26 |     .add('?property rdfs:range ?class',
27 |       ['?property a rdf:Property', '?class a rdfs:Class'])
28 |     .add(['?property rdfs:range ?class', '?subject ?property ?object'],
29 |       '?object a ?class')
30 |     .add('?property rdfs:domain ?class',
31 |       ['?property a rdf:Property', '?class a rdfs:Class'])
32 |     .add(['?property rdfs:domain ?class', '?subject ?property ?object'],
33 |       '?subject a ?class')
34 |     .add('?instance a ?class', '?class a rdfs:Class')
35 |     .add('?subclass rdfs:subClassOf ?class',
36 |       ['?subclass a rdfs:Class', '?class a rdfs:Class'])
37 |     .add(['?subclass rdfs:subClassOf ?class', '?instance a ?subclass'],
38 |       '?instance a ?class')
39 |     .add('?subproperty rdfs:subPropertyOf ?property',
40 |       ['?subproperty a rdf:Property', '?property a rdf:Property'])
41 |     .add(['?subproperty rdfs:subPropertyOf ?property', '?subject ?subproperty ?object'],
42 |       '?subject ?property ?object')
43 |     .add('?statement rdf:subject ?resource', '?statement a rdf:Statement')
44 |     .add('?statement rdf:predicate ?property',
45 |       ['?statement a rdf:Statement', '?property a rdf:Property'])
46 |     .add('?statement rdf:object ?resource', '?statement a rdf:Statement')
47 |     .add(['?statement rdf:subject ?subject', '?statement rdf:predicate ?property', '?statement rdf:object ?object'],
48 |       '?subject ?property ?object')
49 |     .add('?subject rdfs:isDefinedBy ?object', '?subject rdfs:seeAlso ?object')
50 | 
51 | })(jQuery);
52 | 


--------------------------------------------------------------------------------
/faq.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- saved from url=(0059)file://localhost/Users/sam/Documents/Github/faqbot/faq.html -->
 3 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 4 |     <meta charset="utf-8">
 5 |     <title>FaqBot</title>
 6 |     <link rel="stylesheet" href="faq.css">
 7 |     <!--- <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script> -->
 8 |     <script type="text/javascript" src="thirdparty/xregexp.js"></script>
 9 |     <script type="text/javascript" src="thirdparty/jquery-1.7.1.min.js"></script>
10 | 
11 |     <script type="text/javascript" src="thirdparty/jquery.uri.js"></script>
12 |     <script type="text/javascript" src="thirdparty/jquery.xmlns.js"></script>
13 |     <script type="text/javascript" src="thirdparty/jquery.curie.js"></script>
14 |     <script type="text/javascript" src="thirdparty/jquery.datatype.js"></script>
15 |     <script type="text/javascript" src="thirdparty/jquery.rdf.js"></script>
16 |     <script type="text/javascript" src="thirdparty/jquery.rdfa.js"></script>
17 |     <script type="text/javascript" src="thirdparty/jquery.rules.js"></script>
18 |     <script type="text/javascript" src="thirdparty/jquery.rdf.json.js"></script>
19 |     <script type="text/javascript" src="thirdparty/jquery.rdf.turtle.js"></script> 
20 |     <script type="text/javascript" src="thirdparty/removeStopWords.js"></script> 
21 |     <script type="text/javascript" src="thirdparty/natural.js"></script> 
22 | 
23 |     <script type="text/javascript" src="storage.js"></script>
24 |     <script type="text/javascript" src="query.js"></script>
25 |     <script type="text/javascript" src="faq.js"></script>
26 | </head>
27 | <body>
28 |   <div>[Note, to get the bot to store knowledge say things like "There is a person called John", then you can ask questions about 'John']<br/></div>
29 |   <div>[This project is in early alpha. Check the <a href="https://github.com/tansaku/faqbot/blob/master/README.md">Documentation</a> ]<br/></div>
30 |   <br/>
31 |   <div id="history">Bot: hello<br/></div>
32 |   <form id="chat">
33 |     <input id="sentence" type="text" size='45'>      
34 |   </form>
35 |   <br/>
36 |   <iframe src="jasmine.html" width='100%' height='800'></iframe>
37 | </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.rdf.json.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * jQuery RDF @VERSION
 3 |  *
 4 |  * Copyright (c) 2008,2009 Jeni Tennison
 5 |  * Licensed under the MIT (MIT-LICENSE.txt)
 6 |  *
 7 |  * Depends:
 8 |  *  jquery.uri.js
 9 |  *  jquery.xmlns.js
10 |  *  jquery.datatype.js
11 |  *  jquery.curie.js
12 |  *  jquery.rdf.js
13 |  *  jquery.json.js
14 |  */
15 | /**
16 |  * @fileOverview jQuery RDF/JSON parser
17 |  * @author <a href="mailto:jeni@jenitennison.com">Jeni Tennison</a>
18 |  * @copyright (c) 2008,2009 Jeni Tennison
19 |  * @license MIT license (MIT-LICENSE.txt)
20 |  * @version 1.0
21 |  */
22 | /**
23 |  * @exports $ as jQuery
24 |  */
25 | /**
26 |  * @ignore
27 |  */
28 | (function ($) {
29 | 
30 |   $.rdf.parsers['application/json'] = {
31 |     parse: $.secureEvalJSON,
32 |     serialize: $.toJSON,
33 |     triples: function (data) {
34 |       var s, subject, p, property, o, object, i, opts, triples = [];
35 |       for (s in data) {
36 |         subject = (s.substring(0, 2) === '_:') ? $.rdf.blank(s) : $.rdf.resource('<' + s + '>');
37 |         for (p in data[s]) {
38 |           property = $.rdf.resource('<' + p + '>');
39 |           for (i = 0; i < data[s][p].length; i += 1) {
40 |             o = data[s][p][i];
41 |             if (o.type === 'uri') {
42 |               object = $.rdf.resource('<' + o.value + '>');
43 |             } else if (o.type === 'bnode') {
44 |               object = $.rdf.blank(o.value);
45 |             } else {
46 |               // o.type === 'literal'
47 |               if (o.datatype !== undefined) {
48 |                 object = $.rdf.literal(o.value, { datatype: o.datatype });
49 |               } else {
50 |                 opts = {};
51 |                 if (o.lang !== undefined) {
52 |                   opts.lang = o.lang;
53 |                 }
54 |                 object = $.rdf.literal('"' + o.value + '"', opts);
55 |               }
56 |             }
57 |             triples.push($.rdf.triple(subject, property, object));
58 |           }
59 |         }
60 |       }
61 |       return triples;
62 |     },
63 |     dump: function (triples) {
64 |       var e = {},
65 |         i, t, s, p;
66 |       for (i = 0; i < triples.length; i += 1) {
67 |         t = triples[i];
68 |         s = t.subject.value.toString();
69 |         p = t.property.value.toString();
70 |         if (e[s] === undefined) {
71 |           e[s] = {};
72 |         }
73 |         if (e[s][p] === undefined) {
74 |           e[s][p] = [];
75 |         }
76 |         e[s][p].push(t.object.dump());
77 |       }
78 |       return e;
79 |     }
80 |   };
81 | 
82 | })(jQuery);
83 | 


--------------------------------------------------------------------------------
/thirdparty/jasmine-standalone-1.3.1/spec/PlayerSpec.js:
--------------------------------------------------------------------------------
 1 | describe("FaqBot", function() {
 2 |   var sentences = [];
 3 |   var answers = [];
 4 | 
 5 |   it("should respond as expected ", function() {
 6 |     expect(query("There is a game engine Unreal Engine")).toEqual("what was that?");
 7 |   });
 8 | 
 9 |   
10 |   sentences[0] = "There is a game engine called Unreal Engine";
11 |   answers[0] = "Unreal Engine is a game engine";
12 |   sentences[1] = "There is a horse called Matilda";
13 |   answers[1] = "Matilda is a horse";
14 |   sentences[2] = "There is a course called ML";
15 |   answers[2] = "ML is a course";
16 | 
17 |   for (var i in sentences){
18 |     it( "should respond to \""+sentences[i] + "\" with --> \"" + answers[i]+ "\"", function() {  
19 |       expect(query(sentences[i])).toEqual(answers[i]);
20 |     });
21 |   }
22 | 
23 | });
24 | 
25 | describe("Player", function() {
26 |   var player;
27 |   var song;
28 | 
29 |   beforeEach(function() {
30 |     player = new Player();
31 |     song = new Song();
32 |   });
33 | 
34 |   it("should be able to play a Song", function() {
35 |     player.play(song);
36 |     expect(player.currentlyPlayingSong).toEqual(song);
37 | 
38 |     //demonstrates use of custom matcher
39 |     expect(player).toBePlaying(song);
40 |   });
41 | 
42 |   describe("when song has been paused", function() {
43 |     beforeEach(function() {
44 |       player.play(song);
45 |       player.pause();
46 |     });
47 | 
48 |     it("should indicate that the song is currently paused", function() {
49 |       expect(player.isPlaying).toBeFalsy();
50 | 
51 |       // demonstrates use of 'not' with a custom matcher
52 |       expect(player).not.toBePlaying(song);
53 |     });
54 | 
55 |     it("should be possible to resume", function() {
56 |       player.resume();
57 |       expect(player.isPlaying).toBeTruthy();
58 |       expect(player.currentlyPlayingSong).toEqual(song);
59 |     });
60 |   });
61 | 
62 |   // demonstrates use of spies to intercept and test method calls
63 |   it("tells the current song if the user has made it a favorite", function() {
64 |     spyOn(song, 'persistFavoriteStatus');
65 | 
66 |     player.play(song);
67 |     player.makeFavorite();
68 | 
69 |     expect(song.persistFavoriteStatus).toHaveBeenCalledWith(true);
70 |   });
71 | 
72 |   //demonstrates use of expected exceptions
73 |   describe("#resume", function() {
74 |     it("should throw an exception if song is already playing", function() {
75 |       player.play(song);
76 | 
77 |       expect(function() {
78 |         player.resume();
79 |       }).toThrow("song is already playing");
80 |     });
81 |   });
82 | });


--------------------------------------------------------------------------------
/h1-p/count.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
 2 |   "http://www.w3.org/TR/html4/loose.dtd">
 3 | <html>
 4 | <head>
 5 |   <title>Count Stuff</title>
 6 | 
 7 |   <link rel="shortcut icon" type="image/png" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine_favicon.png">
 8 |   <link rel="stylesheet" type="text/css" href="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css">
 9 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.js"></script>
10 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine-html.js"></script>
11 | 
12 |   <!-- include source files here... -->
13 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/src/Player.js"></script>
14 |   <script type="text/javascript" src="../thirdparty/jasmine-standalone-1.3.1/src/Song.js"></script>
15 | 
16 |   <script type="text/javascript" src="../thirdparty/jquery-1.7.1.js"></script>
17 | 
18 |   <script src="viterbi.js"></script>
19 | 
20 |   <div id="output"><br/></div>
21 |   <script type="text/javascript">
22 |     (function() {
23 |       callAjax(function(data){
24 |         var c = count(data);
25 |         word_tags = c.word_tags;
26 |         for(var word in word_tags){
27 |           for(var category in word_tags[word]){
28 |             $("div#output").append(word_tags[word][category]+ 
29 |               ' WORD_TAG '+category+' '+word+'<br/>');
30 |           }
31 |         }
32 |         debugger
33 |         grams = c.grams;
34 |         // feel like we are reaching the limits of what we can do here generically
35 |         // limit of assembling data structures, rather can creating classes?
36 |         // of should be be using functional paradigm
37 |         for(var category in grams['1']){
38 |             $("div#output").append(grams['1'][category]+ 
39 |               ' 1-GRAM '+category+'<br/>');
40 |         }
41 |         for(var category in grams['2']){
42 |           for(var category2 in grams['2'][category]){
43 |             $("div#output").append(grams['2'][category][category2]+ 
44 |               ' 2-GRAM '+category+' '+category2+'<br/>');
45 |           }
46 |         }
47 | 
48 |         for(var category in grams['3']){
49 |           for(var category2 in grams['3'][category]){
50 |             for(var category3 in grams['3'][category][category2]){
51 |               $("div#output").append(grams['3'][category][category2][category3]+ 
52 |                 ' 3-GRAM '+category+' '+category2+' '+category3+'<br/>');
53 |             }
54 |           }
55 |         }
56 | 
57 |         // need to output n-gram counts
58 |       },'gene.train.head2');
59 |     })();
60 |   </script>
61 |  
62 | 
63 | </head>
64 | 
65 | <body>
66 | </body>
67 | </html>
68 | 


--------------------------------------------------------------------------------
/jasmine.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
 2 |   "http://www.w3.org/TR/html4/loose.dtd">
 3 | <html>
 4 | <head>
 5 |   <title>Jasmine Spec Runner</title>
 6 | 
 7 |   <link rel="shortcut icon" type="image/png" href="thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine_favicon.png">
 8 |   <link rel="stylesheet" type="text/css" href="thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css">
 9 |   <script type="text/javascript" src="thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.js"></script>
10 |   <script type="text/javascript" src="thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine-html.js"></script>
11 | 
12 |   <!-- include source files here... -->
13 |   <script type="text/javascript" src="thirdparty/jasmine-standalone-1.3.1/src/Player.js"></script>
14 |   <script type="text/javascript" src="thirdparty/jasmine-standalone-1.3.1/src/Song.js"></script>
15 | 
16 |   <script type="text/javascript" src="thirdparty/jquery-1.7.1.js"></script>
17 |   <script type="text/javascript" src="thirdparty/jquery.uri.js"></script>
18 |   <script type="text/javascript" src="thirdparty/jquery.xmlns.js"></script>
19 |   <script type="text/javascript" src="thirdparty/jquery.curie.js"></script>
20 |   <script type="text/javascript" src="thirdparty/jquery.datatype.js"></script>
21 |   <script type="text/javascript" src="thirdparty/jquery.rdf.js"></script>
22 |   <script type="text/javascript" src="thirdparty/jquery.rdfa.js"></script>
23 |   <script type="text/javascript" src="thirdparty/jquery.rules.js"></script>
24 |   <script type="text/javascript" src="thirdparty/jquery.rdf.json.js"></script>
25 |   <script type="text/javascript" src="thirdparty/jquery.rdf.turtle.js"></script>
26 |   <script type="text/javascript" src="thirdparty/jquery.icndb.js"></script>
27 |   <script type="text/javascript" src="thirdparty/xregexp.js"></script>
28 |   <script type="text/javascript" src="thirdparty/natural.js"></script>
29 |   <script type="text/javascript" src="thirdparty/removeStopWords.js"></script> 
30 |   <script src="storage.js"></script>
31 |   <script src="faq.js"></script>
32 |   <script src="query.js"></script>
33 | 
34 |   <!-- include spec files here... -->
35 |   <script type="text/javascript" src="storageSpec.js"></script>
36 |   <script type="text/javascript" src="querySpec.js"></script>
37 | 
38 |   <script type="text/javascript">
39 |     (function() {
40 |       var jasmineEnv = jasmine.getEnv();
41 |       jasmineEnv.updateInterval = 1000;
42 | 
43 |       var htmlReporter = new jasmine.HtmlReporter();
44 | 
45 |       jasmineEnv.addReporter(htmlReporter);
46 | 
47 |       jasmineEnv.specFilter = function(spec) {
48 |         return htmlReporter.specFilter(spec);
49 |       };
50 | 
51 |       var currentWindowOnload = window.onload;
52 | 
53 |       window.onload = function() {
54 |         if (currentWindowOnload) {
55 |           currentWindowOnload();
56 |         }
57 |         execJasmine();
58 |       };
59 | 
60 |       function execJasmine() {
61 |         jasmineEnv.execute();
62 |       }
63 | 
64 |     })();
65 |   </script>
66 | 
67 | </head>
68 | 
69 | <body>
70 | </body>
71 | </html>
72 | 


--------------------------------------------------------------------------------
/storageSpec.js:
--------------------------------------------------------------------------------
  1 | describe("Storage", function() {
  2 |   var storage;
  3 | 
  4 |   beforeEach(function() {
  5 |     storage = getStorage(new TransientStorage());
  6 |   });
  7 | 
  8 |   afterEach(function() {
  9 | 
 10 |   });
 11 | 
 12 |   
 13 | 
 14 |   it("should be able to get a non null storage", function() {
 15 |     expect(storage).not.toBeNull();
 16 |     expect(storage).toBeDefined();
 17 | 
 18 |   });
 19 | 
 20 |   it("should be able to get a blank storage", function() {
 21 |     expect(storage).not.toBeNull();
 22 |     expect(storage).toBeDefined();
 23 |     expect(storage.backend instanceof TransientStorage).toBeTruthy();
 24 |     expect(storage.databank).not.toBeNull();
 25 |     expect(storage.databank).toBeDefined();
 26 |   });
 27 | 
 28 |   it("should be able to store properties relations", function() {
 29 |     var object = 'Unreal Engine';
 30 |     var relation = 'website';
 31 |     var name = 'http://unrealengine.com';
 32 |     var real_name = "Unreal_Engine";
 33 |     storage.storeProperty(object, relation, name);
 34 |     var result = storage.queryProperty(object, relation);
 35 |     expect(result.value).toEqual(name);
 36 |   });
 37 | 
 38 | 
 39 |   it("should be able to retrieve all properties", function() {
 40 |     var data = [ { name: 'http://unrealengine.com',
 41 |                    relation: 'website' },
 42 |                  { name: '3D',
 43 |                    relation: 'type' } ];
 44 |     var object = 'Unreal Engine';
 45 |     var real_name = "Unreal_Engine";
 46 |     for (var i in data) {
 47 |         storage.storeProperty(object, data[i].relation, data[i].name);
 48 |     }
 49 | 
 50 |     var result = storage.queryAllProperties(object);
 51 |     expect(result instanceof Array).toBeTruthy();
 52 |     expect(result.length).toEqual(2);
 53 | 
 54 |     for (var i in data) {
 55 |       expect(result).toContain(data[i]);
 56 |     }
 57 |   });
 58 | 
 59 |   it("should be able to query databank for properties and fail properly", function() {
 60 |     // TODO should add something to refresh databank between each test
 61 |     var object = 'flower';
 62 |     var name = 'Bert';  
 63 |     var relation = 'colour';
 64 |     var result = storage.queryProperty(name,relation);
 65 |     expect(result).toEqual(undefined);
 66 |   });
 67 | 
 68 | 
 69 |   it("should be able to store named entities", function() {
 70 |     var object = 'robot';
 71 |     var name = 'Robbie';
 72 |     storage.storeEntity(object, name);
 73 |     var result = storage.queryEntity(name);
 74 |     expect(result.type).toEqual(object);
 75 |   });
 76 | 
 77 |   it("should be able to query databank", function() {
 78 |     var object = 'robot';
 79 |     var name = 'Robbie';  
 80 |     storage.storeEntity(object, name);
 81 |     var result = storage.queryEntity(name);
 82 |     expect(result.type).toEqual(object);
 83 |   })
 84 | 
 85 |   it("should be able to query databank and fail properly", function() {
 86 |     // TODO should add something to refresh databank between each test
 87 |     var object = 'flower';
 88 |     var name = 'Bert';  
 89 |     var result = storage.queryEntity(name);
 90 |     expect(result).toEqual(undefined);
 91 |   });
 92 | 
 93 |   it("should persist the transcript", function(){
 94 |      fail();
 95 |   });
 96 | 
 97 | 
 98 |   it("can be cleared", function() {
 99 |     var storage = getStorage(new TransientStorage());
100 |     expect(storage).not.toBeNull();
101 |     expect(storage).toBeDefined();
102 |     var object = 'robot';
103 |     var name = 'Robbie';
104 |     storage.storeEntity(object, name);
105 |     var result = storage.queryEntity(name);
106 |     expect(result.type).toEqual(object);
107 |     storage.clearDatabank();
108 |     var result = storage.queryEntity(name);
109 |     expect(result).toBeUndefined();
110 | 
111 |   });
112 | 
113 | 
114 | });
115 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | faqbot
 2 | ======
 3 | 
 4 | JavaScript chat bot to answer frequently asked questions (faq)
 5 | 
 6 | Preview the bot here: [http://htmlpreview.github.com/?https://github.com/tansaku/faqbot/blob/master/faq.html](http://htmlpreview.github.com/?https://github.com/tansaku/faqbot/blob/master/faq.html)
 7 | 
 8 | ----
 9 | 
10 | Most Natural Language Processing (NLP) approaches seem to be focused on how to parse sentences - not on how to construct sentences that match some model of the world (notable exception is SHRDLU)
11 | 
12 | Most Chatbots seem designed to have a fixed database of material (AIML), use regexp (Eliza), although some do learn and remember word occurrence probabilities (MegaHAL)
13 | 
14 | There appears to be an interesting opportunity for a chatbot that made additional use of NLP techniques and a good persistence framework in order to be a repository for knowledge and provide support in online text chat environments.
15 | 
16 | This project is aiming to create a chatbot that can have a knowledge base updated by individual users through a process of discussion, e.g.  
17 | 
18 | Human: There is a game engine Unity3S  
19 | Bot: OK  
20 | Human: What is Unity3D?  
21 | Bot: Unity3D is a game engine
22 | Human: Unity3D has a URL of http://www.studica.com/unity
23 | Bot: OK
24 | Human: What is the URL for Unity3D? 
25 | Bot: The URL for Unity3D is http://www.studica.com/unity
26 | 
27 | Ideally all conversations will be stored in the cloud, but not rely on having to maintain a specific server.  In the first instance Github Gists seem like a good idea.  An authenticated Github user could have gists created associated with their own user account.  Although ideally the chatbot won't be tied to a particular persistence framework.
28 | 
29 | This kind of system could have great applicaton for online classes.  It would be great if the faqbots knowledge base could contain all the data related to an online class including individual assignment statuses, thus allowing interactions like this:
30 | 
31 | Human: What's my next assignment?  
32 | Bot: It's assignment 4 on prototyping a mobile interface [link]
33 | 
34 | Having authenticated against something like a Google or Facebook login.
35 | 
36 | Hubot and Skype are interestng related areas.  
37 | 
38 | We're trying to understand how hubot persists data between conversations (https://github.com/github/hubot/issues/373#issuecomment-11992999) and looks great in as much as hubot already has a skype plugin.  Skype is interesting because many online classes use its text chat for communication, but it seems that a skype bot must run as a client on someone's computer.
39 | 
40 | In the first instance we've decided to go with client side javascript since it can run in HTML pages, and give the most open possible access to the faqbot.  Ideally we'd like to see simple HTML pages with the chatbot interface where authenticated users can have conversations like this:
41 | 
42 | Human: There is a game engine Unreal Engine  
43 | Bot: OK  
44 | Human: What is Unreal Engine?  
45 | Bot: Unreal Engine is a game  
46 | Human: No, Unreal Engine is a game engine  
47 | Bot: OK  
48 | Human: What is Unreal Engine?  
49 | Bot: Unreal Engine is a game engine
50 | 
51 | where effectively the human is providing a specification of how the bot should be responding to a question.  This specification should then be added automatically to the set of unit tests for the bot, so that it can be checked in future against subsequent changes in specification.
52 | 
53 | Open Issues:
54 | 
55 | 1) need support for named entity recognition in NaturalJS
56 | 2) need to fix on a first HTML interface
57 | 3) would like to provide predictive text look up to help scaffold users chatting with bot
58 | 
59 | Background
60 | 
61 | The current design of this system arises from an earlier attempt to implement the same approach in Python with the NLTK.  This worked fine, except that it was not straightforward to install support for the NLTK on a cloud hosting service like Heroku.  Also it seemed that members of the project were unlikely to download and hack on a python project that meant getting set up with NLTK locally.  It seemed that if we made available a pure HTML interface that didn't rely on having particular software libraries installed on a server, and that chatting to the bot would mirror the process of generating unit tests for it's operation, then it might be easier to get more people involved.  Ultimately the project can use any technology, but it makes sense to get some serious prototyping done by making simple versions of the system as accessible as possible.
62 | 
63 | Running with chrome
64 | 
65 | if running the faqbot with chrome locally from the filesystem, chrome will need to be launched with the --allow-access-from-files flag
66 | 


--------------------------------------------------------------------------------
/thirdparty/qunit-1.10.0.css:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * QUnit v1.10.0 - A JavaScript Unit Testing Framework
  3 |  *
  4 |  * http://qunitjs.com
  5 |  *
  6 |  * Copyright 2012 jQuery Foundation and other contributors
  7 |  * Released under the MIT license.
  8 |  * http://jquery.org/license
  9 |  */
 10 | 
 11 | /** Font Family and Sizes */
 12 | 
 13 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult {
 14 | 	font-family: "Helvetica Neue Light", "HelveticaNeue-Light", "Helvetica Neue", Calibri, Helvetica, Arial, sans-serif;
 15 | }
 16 | 
 17 | #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult, #qunit-tests li { font-size: small; }
 18 | #qunit-tests { font-size: smaller; }
 19 | 
 20 | 
 21 | /** Resets */
 22 | 
 23 | #qunit-tests, #qunit-tests ol, #qunit-header, #qunit-banner, #qunit-userAgent, #qunit-testresult, #qunit-modulefilter {
 24 | 	margin: 0;
 25 | 	padding: 0;
 26 | }
 27 | 
 28 | 
 29 | /** Header */
 30 | 
 31 | #qunit-header {
 32 | 	padding: 0.5em 0 0.5em 1em;
 33 | 
 34 | 	color: #8699a4;
 35 | 	background-color: #0d3349;
 36 | 
 37 | 	font-size: 1.5em;
 38 | 	line-height: 1em;
 39 | 	font-weight: normal;
 40 | 
 41 | 	border-radius: 5px 5px 0 0;
 42 | 	-moz-border-radius: 5px 5px 0 0;
 43 | 	-webkit-border-top-right-radius: 5px;
 44 | 	-webkit-border-top-left-radius: 5px;
 45 | }
 46 | 
 47 | #qunit-header a {
 48 | 	text-decoration: none;
 49 | 	color: #c2ccd1;
 50 | }
 51 | 
 52 | #qunit-header a:hover,
 53 | #qunit-header a:focus {
 54 | 	color: #fff;
 55 | }
 56 | 
 57 | #qunit-testrunner-toolbar label {
 58 | 	display: inline-block;
 59 | 	padding: 0 .5em 0 .1em;
 60 | }
 61 | 
 62 | #qunit-banner {
 63 | 	height: 5px;
 64 | }
 65 | 
 66 | #qunit-testrunner-toolbar {
 67 | 	padding: 0.5em 0 0.5em 2em;
 68 | 	color: #5E740B;
 69 | 	background-color: #eee;
 70 | 	overflow: hidden;
 71 | }
 72 | 
 73 | #qunit-userAgent {
 74 | 	padding: 0.5em 0 0.5em 2.5em;
 75 | 	background-color: #2b81af;
 76 | 	color: #fff;
 77 | 	text-shadow: rgba(0, 0, 0, 0.5) 2px 2px 1px;
 78 | }
 79 | 
 80 | #qunit-modulefilter-container {
 81 | 	float: right;
 82 | }
 83 | 
 84 | /** Tests: Pass/Fail */
 85 | 
 86 | #qunit-tests {
 87 | 	list-style-position: inside;
 88 | }
 89 | 
 90 | #qunit-tests li {
 91 | 	padding: 0.4em 0.5em 0.4em 2.5em;
 92 | 	border-bottom: 1px solid #fff;
 93 | 	list-style-position: inside;
 94 | }
 95 | 
 96 | #qunit-tests.hidepass li.pass, #qunit-tests.hidepass li.running  {
 97 | 	display: none;
 98 | }
 99 | 
100 | #qunit-tests li strong {
101 | 	cursor: pointer;
102 | }
103 | 
104 | #qunit-tests li a {
105 | 	padding: 0.5em;
106 | 	color: #c2ccd1;
107 | 	text-decoration: none;
108 | }
109 | #qunit-tests li a:hover,
110 | #qunit-tests li a:focus {
111 | 	color: #000;
112 | }
113 | 
114 | #qunit-tests ol {
115 | 	margin-top: 0.5em;
116 | 	padding: 0.5em;
117 | 
118 | 	background-color: #fff;
119 | 
120 | 	border-radius: 5px;
121 | 	-moz-border-radius: 5px;
122 | 	-webkit-border-radius: 5px;
123 | }
124 | 
125 | #qunit-tests table {
126 | 	border-collapse: collapse;
127 | 	margin-top: .2em;
128 | }
129 | 
130 | #qunit-tests th {
131 | 	text-align: right;
132 | 	vertical-align: top;
133 | 	padding: 0 .5em 0 0;
134 | }
135 | 
136 | #qunit-tests td {
137 | 	vertical-align: top;
138 | }
139 | 
140 | #qunit-tests pre {
141 | 	margin: 0;
142 | 	white-space: pre-wrap;
143 | 	word-wrap: break-word;
144 | }
145 | 
146 | #qunit-tests del {
147 | 	background-color: #e0f2be;
148 | 	color: #374e0c;
149 | 	text-decoration: none;
150 | }
151 | 
152 | #qunit-tests ins {
153 | 	background-color: #ffcaca;
154 | 	color: #500;
155 | 	text-decoration: none;
156 | }
157 | 
158 | /*** Test Counts */
159 | 
160 | #qunit-tests b.counts                       { color: black; }
161 | #qunit-tests b.passed                       { color: #5E740B; }
162 | #qunit-tests b.failed                       { color: #710909; }
163 | 
164 | #qunit-tests li li {
165 | 	padding: 5px;
166 | 	background-color: #fff;
167 | 	border-bottom: none;
168 | 	list-style-position: inside;
169 | }
170 | 
171 | /*** Passing Styles */
172 | 
173 | #qunit-tests li li.pass {
174 | 	color: #3c510c;
175 | 	background-color: #fff;
176 | 	border-left: 10px solid #C6E746;
177 | }
178 | 
179 | #qunit-tests .pass                          { color: #528CE0; background-color: #D2E0E6; }
180 | #qunit-tests .pass .test-name               { color: #366097; }
181 | 
182 | #qunit-tests .pass .test-actual,
183 | #qunit-tests .pass .test-expected           { color: #999999; }
184 | 
185 | #qunit-banner.qunit-pass                    { background-color: #C6E746; }
186 | 
187 | /*** Failing Styles */
188 | 
189 | #qunit-tests li li.fail {
190 | 	color: #710909;
191 | 	background-color: #fff;
192 | 	border-left: 10px solid #EE5757;
193 | 	white-space: pre;
194 | }
195 | 
196 | #qunit-tests > li:last-child {
197 | 	border-radius: 0 0 5px 5px;
198 | 	-moz-border-radius: 0 0 5px 5px;
199 | 	-webkit-border-bottom-right-radius: 5px;
200 | 	-webkit-border-bottom-left-radius: 5px;
201 | }
202 | 
203 | #qunit-tests .fail                          { color: #000000; background-color: #EE5757; }
204 | #qunit-tests .fail .test-name,
205 | #qunit-tests .fail .module-name             { color: #000000; }
206 | 
207 | #qunit-tests .fail .test-actual             { color: #EE5757; }
208 | #qunit-tests .fail .test-expected           { color: green;   }
209 | 
210 | #qunit-banner.qunit-fail                    { background-color: #EE5757; }
211 | 
212 | 
213 | /** Result */
214 | 
215 | #qunit-testresult {
216 | 	padding: 0.5em 0.5em 0.5em 2.5em;
217 | 
218 | 	color: #2b81af;
219 | 	background-color: #D2E0E6;
220 | 
221 | 	border-bottom: 1px solid white;
222 | }
223 | #qunit-testresult .module-name {
224 | 	font-weight: bold;
225 | }
226 | 
227 | /** Fixture */
228 | 
229 | #qunit-fixture {
230 | 	position: absolute;
231 | 	top: -10000px;
232 | 	left: -10000px;
233 | 	width: 1000px;
234 | 	height: 1000px;
235 | }
236 | 


--------------------------------------------------------------------------------
/h1-p/submit.py:
--------------------------------------------------------------------------------
  1 | ### The only things you'll have to edit (unless you're porting this script over to a different language) 
  2 | ### are at the bottom of this file.
  3 | 
  4 | import urllib
  5 | import urllib2
  6 | import hashlib
  7 | import random
  8 | import email
  9 | import email.message
 10 | import email.encoders
 11 | import StringIO
 12 | import sys
 13 | 
 14 | """"""""""""""""""""
 15 | """"""""""""""""""""
 16 | 
 17 | class NullDevice:
 18 |   def write(self, s):
 19 |     pass
 20 | 
 21 | def submit():   
 22 |   print '==\n== [sandbox] Submitting Solutions \n=='
 23 |   
 24 |   (login, password) = loginPrompt()
 25 |   if not login:
 26 |     print '!! Submission Cancelled'
 27 |     return
 28 |   
 29 |   print '\n== Connecting to Coursera ... '
 30 | 
 31 |   # Part Identifier
 32 |   (partIdx, sid) = partPrompt()
 33 | 
 34 |   # Get Challenge
 35 |   (login, ch, state, ch_aux) = getChallenge(login, sid) #sid is the "part identifier"
 36 |   if((not login) or (not ch) or (not state)):
 37 |     # Some error occured, error string in first return element.
 38 |     print '\n!! Error: %s\n' % login
 39 |     return
 40 | 
 41 |   # Attempt Submission with Challenge
 42 |   ch_resp = challengeResponse(login, password, ch)
 43 |   (result, string) = submitSolution(login, ch_resp, sid, output(partIdx), \
 44 |                                   source(partIdx), state, ch_aux)
 45 | 
 46 |   print '== %s' % string.strip()
 47 | 
 48 | 
 49 | # =========================== LOGIN HELPERS - NO NEED TO CONFIGURE THIS =======================================
 50 | 
 51 | def loginPrompt():
 52 |   """Prompt the user for login credentials. Returns a tuple (login, password)."""
 53 |   (login, password) = basicPrompt()
 54 |   return login, password
 55 | 
 56 | 
 57 | def basicPrompt():
 58 |   """Prompt the user for login credentials. Returns a tuple (login, password)."""
 59 |   login = raw_input('Login (Email address): ')
 60 |   password = raw_input('One-time Password (from the assignment page. This is NOT your own account\'s password): ')
 61 |   return login, password
 62 | 
 63 | def partPrompt():
 64 |   print 'Hello! These are the assignment parts that you can submit:'
 65 |   counter = 0
 66 |   for part in partFriendlyNames:
 67 |     counter += 1
 68 |     print str(counter) + ') ' + partFriendlyNames[counter - 1]
 69 |   partIdx = int(raw_input('Please enter which part you want to submit (1-' + str(counter) + '): ')) - 1
 70 |   return (partIdx, partIds[partIdx])
 71 | 
 72 | def getChallenge(email, sid):
 73 |   """Gets the challenge salt from the server. Returns (email,ch,state,ch_aux)."""
 74 |   url = challenge_url()
 75 |   values = {'email_address' : email, 'assignment_part_sid' : sid, 'response_encoding' : 'delim'}
 76 |   data = urllib.urlencode(values)
 77 |   req = urllib2.Request(url, data)
 78 |   response = urllib2.urlopen(req)
 79 |   text = response.read().strip()
 80 | 
 81 |   # text is of the form email|ch|signature
 82 |   splits = text.split('|')
 83 |   if(len(splits) != 9):
 84 |     print 'Badly formatted challenge response: %s' % text
 85 |     return None
 86 |   return (splits[2], splits[4], splits[6], splits[8])
 87 | 
 88 | def challengeResponse(email, passwd, challenge):
 89 |   sha1 = hashlib.sha1()
 90 |   sha1.update("".join([challenge, passwd])) # hash the first elements
 91 |   digest = sha1.hexdigest()
 92 |   strAnswer = ''
 93 |   for i in range(0, len(digest)):
 94 |     strAnswer = strAnswer + digest[i]
 95 |   return strAnswer 
 96 |   
 97 | def challenge_url():
 98 |   """Returns the challenge url."""
 99 |   return "https://class.coursera.org/" + URL + "/assignment/challenge"
100 | 
101 | def submit_url():
102 |   """Returns the submission url."""
103 |   return "https://class.coursera.org/" + URL + "/assignment/submit"
104 | 
105 | def submitSolution(email_address, ch_resp, sid, output, source, state, ch_aux):
106 |   """Submits a solution to the server. Returns (result, string)."""
107 |   source_64_msg = email.message.Message()
108 |   source_64_msg.set_payload(source)
109 |   email.encoders.encode_base64(source_64_msg)
110 | 
111 |   output_64_msg = email.message.Message()
112 |   output_64_msg.set_payload(output)
113 |   email.encoders.encode_base64(output_64_msg)
114 |   values = { 'assignment_part_sid' : sid, \
115 |              'email_address' : email_address, \
116 |              #'submission' : output, \
117 |              'submission' : output_64_msg.get_payload(), \
118 |              #'submission_aux' : source, \
119 |              'submission_aux' : source_64_msg.get_payload(), \
120 |              'challenge_response' : ch_resp, \
121 |              'state' : state \
122 |            }
123 |   url = submit_url()  
124 |   data = urllib.urlencode(values)
125 |   req = urllib2.Request(url, data)
126 |   response = urllib2.urlopen(req)
127 |   string = response.read().strip()
128 |   result = 0
129 |   return result, string
130 | 
131 | ## This collects the source code (just for logging purposes) 
132 | def source(partIdx):
133 |   # open the file, get all lines
134 |   return ""
135 | 
136 | 
137 | 
138 | ############ BEGIN ASSIGNMENT SPECIFIC CODE - YOU'LL HAVE TO EDIT THIS ##############
139 | 
140 | # Make sure you change this string to the last segment of your class URL.
141 | # For example, if your URL is https://class.coursera.org/pgm-2012-001-staging, set it to "pgm-2012-001-staging".
142 | URL = 'nlangp-001'
143 | 
144 | # the "Identifier" you used when creating the part
145 | partIds = ['hmm-part1', 'hmm-part2', 'hmm-part3']                        
146 | # used to generate readable run-time information for students
147 | partFriendlyNames = ['Unigram Tagger', 'Trigram Tagger', 'Extended Tagger'] 
148 |           
149 | def output(partIdx):
150 |   try:
151 |     return open("gene_test.p%d.out"%(partIdx + 1)).read()
152 |   except:
153 |     print "File gene_test.p%d.out not found"%(partIdx + 1) 
154 |     exit()
155 | 
156 | submit()
157 | 


--------------------------------------------------------------------------------
/querySpec.js:
--------------------------------------------------------------------------------
  1 | describe("FaqBot", function() {
  2 |   var sentences = [];
  3 |   var answers = [];
  4 |   var storage = null;
  5 | 
  6 |   beforeEach(function() {
  7 |     storage = getStorage(new TransientStorage());
  8 |     initStorage(storage);
  9 |   });
 10 | 
 11 |   afterEach(function() {
 12 |     storage.clearDatabank();
 13 |   });
 14 | 
 15 |   it("should respond as expected ", function() {
 16 |     expect(query(storage,"There is a game engine Unreal Engine")).toEqual("why?");
 17 |   });
 18 | 
 19 |   // TODO ideally all this data would be in starting knowledge base for bot as well?
 20 |   // would be nice if we had this in some separate file perhaps? fixture? !!
 21 |   sentences.push("There is a game engine called Unreal Engine");
 22 |   answers.push("Unreal Engine is a game engine");
 23 |   // should be pushing some expected knowledge structure on here
 24 |   sentences.push("There is a horse called Matilda");
 25 |   answers.push("Matilda is a horse");
 26 |   sentences.push("There is a course called ML");
 27 |   answers.push("ML is a course");
 28 |   //sentences.push("Gandalf is a wizard"); // will require new regex - next step extract existing one
 29 |   //answers.push("Gandalf is a wizard");
 30 |   sentences.push("Unreal Engine has a website http://unrealengine.com");
 31 |   answers.push("The website for Unreal Engine is http://unrealengine.com");
 32 | 
 33 | /*
 34 |   sentences.push("There is a game engine called Unity3D");
 35 |   answers.push("Unity3D is a game engine");
 36 |   sentences.push("Unity3D has a URL of http://www.studica.com/unity");
 37 |   answers.push("The URL for Unreal Engine is http://www.studica.com/unity");
 38 |   sentences.push("Unity3D has a type of integrated");
 39 |   answers.push("The type for Unreal Engine is integrated");
 40 |   sentences.push("Unity3D has a typeof 3D");
 41 |   answers.push("The type for Unity3D is 3D");
 42 |   sentences.push("What type of game engine is Unity3D?");
 43 |   answers.push("The type for Unity3D is '3D'")
 44 | 
 45 |   sentences.push("There is a game engine Crysis");
 46 |   answers.push("Crysis is a game engine");
 47 | 
 48 |   sentences.push("There is a game engine Source");
 49 |   answers.push("Source is a game engine");
 50 |   sentences.push("Source has a URL of http://source.valvesoftware.com/sourcesdk/sourceu.php");
 51 |   answers.push("The URL for Source is http://source.valvesoftware.com/sourcesdk/sourceu.php");
 52 | */
 53 | 
 54 |   var checkAnswer = function(i){
 55 |     it( "should respond to \""+sentences[i] + "\" with --> \"" + answers[i]+ "\"", function() { 
 56 | 
 57 |         expect(query(storage,sentences[i])).toEqual(answers[i]);
 58 |         // ideally we should be checking that data is stored in knowledge base ...
 59 |         // and dumping the knowledge base on each test iteration here ...
 60 |       });
 61 |   }
 62 | 
 63 |   for (var i in sentences){
 64 |     checkAnswer(i);
 65 |   }
 66 |   
 67 |   it("should match entity assertion regex", function() {
 68 |     // websites have URLs
 69 |     var result = matchEntityAssertionRegex("There is a robot called Robbie");
 70 |     expect(result).toNotEqual(null);
 71 |     expect(result).toNotEqual(undefined);
 72 |     expect(result.object).toEqual("robot");
 73 |     expect(result.name).toEqual("Robbie");
 74 |   });
 75 | 
 76 |   it("should match properties regex", function() {
 77 |     // websites have URLs
 78 |     var result = matchPropertiesRegex("Unreal Engine has a website http://unrealengine.com");
 79 |     expect(result).toNotEqual(null);
 80 |     expect(result).toNotEqual(undefined);
 81 |     expect(result.object).toEqual("Unreal Engine");
 82 |     expect(result.relation).toEqual("website");
 83 |     expect(result.name).toEqual("http://unrealengine.com");
 84 |   });
 85 | 
 86 |   it("should remove punctuation", function() {
 87 |     expect(removePunctuation("Hello. How are you?")).toEqual("Hello How are you");
 88 |   });
 89 | 
 90 |   it("should query against a specific storage", function(){
 91 |     expect(query(storage,"There is a game engine called Unreal Engine")).toEqual("Unreal Engine is a game engine");
 92 |   });
 93 | 
 94 |   it("should respond from database when asked about a one word item", function() {
 95 |     expect(query(storage,"There is a course called ML")).toEqual("ML is a course");
 96 |     expect(query(storage,"What do you know about ML")).toEqual("I know that ML is a course");
 97 |     expect(query(storage,"What do you know about ML?")).toEqual("I know that ML is a course");
 98 |   });
 99 | 
100 |   it("should respond from database when asked about a two word item", function() {
101 |     expect(query(storage,"There is a game engine called Unreal Engine")).toEqual("Unreal Engine is a game engine");
102 |     expect(query(storage,"Unreal Engine has a website http://unrealengine.com")).toEqual("The website for Unreal Engine is http://unrealengine.com");
103 |     expect(query(storage,"What do you know about Unreal Engine")).toEqual("I know that Unreal Engine is a game engine and website for Unreal Engine is http://unrealengine.com");
104 |     expect(query(storage,"What do you know about Unreal Engine?")).toEqual("I know that Unreal Engine is a game engine and website for Unreal Engine is http://unrealengine.com");
105 |   });
106 | 
107 |   it("should be able to handle question based on passed in storage", function() {
108 |     expect(query(storage,"There is a course called ML")).toEqual("ML is a course");
109 |     expect(handleQuestion(storage,"What do you know about ML")).toEqual("I know that ML is a course");
110 |   });
111 | 
112 |   it("it should be able to handle queries about a single properties", function() {
113 |     expect(query(storage,"There is a game engine called Unreal Engine")).toEqual("Unreal Engine is a game engine");
114 |     expect(query(storage,"Unreal Engine has a website http://unrealengine.com")).toEqual("The website for Unreal Engine is http://unrealengine.com");
115 |     expect(query(storage,"what is the website of Unreal Engine?")).toEqual("The website for Unreal Engine is http://unrealengine.com");
116 |   });
117 | 
118 |   // not sure how/if we can have pending specs
119 | 
120 | });
121 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.icndb.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *	Version: 0.1
  3 |  */
  4 | (function($) {
  5 | 	$.icndb = {};
  6 | 	$.icndb.client = {}
  7 | 	$.icndb.client.id = 4;
  8 | 	$.icndb.client.version = 0.1;
  9 | 
 10 | 	var base = "http://api.icndb.com/";
 11 | 
 12 | 	/**
 13 | 	 *	Returns the full URL of the given resource.
 14 | 	 *	Ex.: 'jokes/random/5' -> http://api.icndb.com/jokes/random/5?client=4&clientVersion=0.1
 15 | 	 *
 16 | 	 *	@param	resource	The relative path of hte resource, NO LEADING '/'
 17 | 	 */
 18 | 	var full = function(resource) {
 19 | 		return base + resource + '?client=' + $.icndb.client.id + '&clientVersion=' + $.icndb.client.version;
 20 | 	};
 21 | 
 22 | 	/**
 23 | 	 *	Calls the URL, evaluated the JSON returned and returns the result value as JS object on success.
 24 | 	 *	No exceptions here, this always uses script communication.
 25 | 	 *
 26 | 	 *	@param	destination			Location of the destination (String URL)
 27 | 	 *	@param	successCB(result)	Callback on success. Will be called with result value as JS object.
 28 | 	 *								result = {"type": <type:String>, "value": <value:Object>}
 29 | 	 */
 30 | 	var callServer = function(destination, successCB, errorCB) {
 31 | 		$.ajax({
 32 | 			url: destination,
 33 | 			dataType: "jsonp",
 34 | 			type: "GET",
 35 | 			success: function(result) {
 36 | 				successCB(result);
 37 | 			}
 38 | 		});
 39 | 	}
 40 | 
 41 | 	/************************************************************************
 42 | 	 *	Simple API
 43 | 	 ************************************************************************/
 44 | 
 45 | 	/**
 46 | 	 *	Returns multiple random Chuck Norris jokes to the callback function, optionally with given first name and last name.
 47 | 	 *	There can be no error when retrieving random jokes.
 48 | 	 *	
 49 | 	 *	@param	success(jokes: [{id: <id:integer>, joke: <joke:String>}])
 50 | 	 *	
 51 | 	 *	OR
 52 | 	 *	
 53 | 	 *	@param	{
 54 | 	 *		success: function(jokes: [{id: <id:integer>, joke: <joke:String>}])
 55 | 	 *		number		[optional]	The number of jokes to retrieve. If not given, 1 joke is retrieved.
 56 | 	 *		firstName	[optional] 	The first name of the main character in the joke.
 57 | 	 *		lastName	[optional] 	The last name of the main character in the joke.
 58 | 	 *		limitTo		[optional] 	An array of categories (Strings) to which the joke may belong.
 59 | 	 *		exclude	[optional, only processed if limitTo not given]	An array of categories (Strings) to which the joke may not belong.
 60 | 	 *	}
 61 | 	 */
 62 | 	$.icndb.getRandomJokes = function(args) {
 63 | 		var success = function(result) {
 64 | 			// notice: never NoSuchJokeException with random jokes
 65 | 			if(args.success) {
 66 | 				args.success(result.value);
 67 | 			} else {
 68 | 				args(result.value);
 69 | 			}
 70 | 		}
 71 | 		var number = 1;
 72 | 		if(args.number) { 
 73 | 			number = args.number; 
 74 | 		}
 75 | 		var url = full("jokes/random/" + number);
 76 | 		if(args.firstName) {
 77 | 			url += "&firstName=" + args.firstName;
 78 | 		}
 79 | 		if(args.lastName) {
 80 | 			url += "&lastName=" + args.lastName;
 81 | 		}
 82 | 		if(args.limitTo) {
 83 | 			url += "&limitTo=[" + args.limitTo.toString() + "]";
 84 | 		} else if(args.exclude) {
 85 | 			url += "&exclude=[" + args.exclude.toString() + "]";
 86 | 		}
 87 | 		callServer(url, success, function() {} ); 
 88 | 	};
 89 | 
 90 | 	/**
 91 | 	 *	Returns a random Chuck Norris joke to the callback function, optionally with given first name and last name.
 92 | 	 *	There can be no error when retrieving a random joke.
 93 | 	 *	
 94 | 	 *	@param	success(joke: {id: <id:integer>, joke: <joke:String>})
 95 | 	 *	
 96 | 	 *	OR
 97 | 	 *	
 98 | 	 *	@param	{
 99 | 	 *		success: function(joke: {id: <id:integer>, joke: <joke:String>})
100 | 	 *		firstName	[optional] The first name of the main character in the joke.
101 | 	 *		lastName	[optional] The last name of the main character in the joke.
102 | 	 *		limitTo		[optional] An array of categories (Strings) to which the joke may belong.
103 | 	 *		exclude	[optional, only processed if limitTo not given]	An array of categories (Strings) to which the joke may not belong.
104 | 	 *	}
105 | 	 */
106 | 	$.icndb.getRandomJoke = function(args) {
107 | 		var args2 = {};
108 | 		$.extend(args2, args);
109 | 		args2.success = function(result) {
110 | 			// notice: never NoSuchJokeException with random jokes
111 | 			result = result[0];
112 | 			if(args.success) {
113 | 				args.success(result);
114 | 			} else {
115 | 				args(result);
116 | 			}
117 | 		};
118 | 		args2.number = 1;
119 | 		$.icndb.getRandomJokes(args2);
120 | 	};
121 | 
122 | 	/**
123 | 	 *	Returns all the jokes in the database.
124 | 	 *	
125 | 	 *	@param	success: function(jokes: [{id: <id:integer>, joke: <joke:String>}])
126 | 	 *	
127 | 	 *	OR
128 | 	 *	
129 | 	 *	@param	{
130 | 	 *		success: function(jokes: [{id: <id:integer>, joke: <joke:String>}])
131 | 	 *		firstName	[optional] The first name of the main character in the joke.
132 | 	 *		lastName	[optional] The last name of the main character in the joke.
133 | 	 *		limitTo		[optional] An array of categories (Strings) to which the joke may belong.
134 | 	 *		exclude	[optional, only processed if limitTo not given]	An array of categories (Strings) to which the joke may not belong.
135 | 	 */
136 | 	$.icndb.getJokes = function(args) {		
137 | 		var success = function(result) {
138 | 			// notice: never NoSuchJokeException when retrieving all jokes
139 | 			if(args.success) {
140 | 				args.success(result.value);
141 | 			} else {
142 | 				args(result.value);
143 | 			}
144 | 		}
145 | 		var url = full("jokes");
146 | 		if(args.firstName) {
147 | 			url += "&firstName=" + args.firstName;
148 | 		}
149 | 		if(args.lastName) {
150 | 			url += "&lastName=" + args.lastName;
151 | 		}
152 | 		if(args.limitTo) {
153 | 			url += "&limitTo=[" + args.limitTo.toString() + "]";
154 | 		} else if(args.exclude) {
155 | 			url += "&exclude=[" + args.exclude.toString() + "]";
156 | 		}
157 | 		callServer(url, success, function() {} ); 
158 | 	};
159 | 
160 | 	/**
161 | 	 *	Returns the categories in the systems as an array of strings.
162 | 	 *	
163 | 	 *	@param	callback:function(categories:[String])
164 | 	 */
165 | 	$.icndb.getCategories = function(callback) {
166 | 		var success = function(result) {
167 | 			callback(result.value);
168 | 		};
169 | 		var url = full("categories");
170 | 		callServer(url, success, function() {} ); 
171 | 	}
172 | 
173 | 	/**
174 | 	 *	Returns the number of jokes in the database.
175 | 	 *	
176 | 	 *	@param	callback:function(categories:[integer])
177 | 	 */
178 | 	$.icndb.getNumberOfJokes = function(callback) {
179 | 		var success = function(result) {
180 | 			callback(result.value);
181 | 		};
182 | 		var url = full("jokes/count");
183 | 		callServer(url, success, function() {} ); 
184 | 	}
185 | })(jQuery);
186 | 


--------------------------------------------------------------------------------
/thirdparty/jasmine-standalone-1.3.1/lib/jasmine-1.3.1/jasmine.css:
--------------------------------------------------------------------------------
 1 | body { background-color: #eeeeee; padding: 0; margin: 5px; overflow-y: scroll; }
 2 | 
 3 | #HTMLReporter { font-size: 11px; font-family: Monaco, "Lucida Console", monospace; line-height: 14px; color: #333333; }
 4 | #HTMLReporter a { text-decoration: none; }
 5 | #HTMLReporter a:hover { text-decoration: underline; }
 6 | #HTMLReporter p, #HTMLReporter h1, #HTMLReporter h2, #HTMLReporter h3, #HTMLReporter h4, #HTMLReporter h5, #HTMLReporter h6 { margin: 0; line-height: 14px; }
 7 | #HTMLReporter .banner, #HTMLReporter .symbolSummary, #HTMLReporter .summary, #HTMLReporter .resultMessage, #HTMLReporter .specDetail .description, #HTMLReporter .alert .bar, #HTMLReporter .stackTrace { padding-left: 9px; padding-right: 9px; }
 8 | #HTMLReporter #jasmine_content { position: fixed; right: 100%; }
 9 | #HTMLReporter .version { color: #aaaaaa; }
10 | #HTMLReporter .banner { margin-top: 14px; }
11 | #HTMLReporter .duration { color: #aaaaaa; float: right; }
12 | #HTMLReporter .symbolSummary { overflow: hidden; *zoom: 1; margin: 14px 0; }
13 | #HTMLReporter .symbolSummary li { display: block; float: left; height: 7px; width: 14px; margin-bottom: 7px; font-size: 16px; }
14 | #HTMLReporter .symbolSummary li.passed { font-size: 14px; }
15 | #HTMLReporter .symbolSummary li.passed:before { color: #5e7d00; content: "\02022"; }
16 | #HTMLReporter .symbolSummary li.failed { line-height: 9px; }
17 | #HTMLReporter .symbolSummary li.failed:before { color: #b03911; content: "x"; font-weight: bold; margin-left: -1px; }
18 | #HTMLReporter .symbolSummary li.skipped { font-size: 14px; }
19 | #HTMLReporter .symbolSummary li.skipped:before { color: #bababa; content: "\02022"; }
20 | #HTMLReporter .symbolSummary li.pending { line-height: 11px; }
21 | #HTMLReporter .symbolSummary li.pending:before { color: #aaaaaa; content: "-"; }
22 | #HTMLReporter .exceptions { color: #fff; float: right; margin-top: 5px; margin-right: 5px; }
23 | #HTMLReporter .bar { line-height: 28px; font-size: 14px; display: block; color: #eee; }
24 | #HTMLReporter .runningAlert { background-color: #666666; }
25 | #HTMLReporter .skippedAlert { background-color: #aaaaaa; }
26 | #HTMLReporter .skippedAlert:first-child { background-color: #333333; }
27 | #HTMLReporter .skippedAlert:hover { text-decoration: none; color: white; text-decoration: underline; }
28 | #HTMLReporter .passingAlert { background-color: #a6b779; }
29 | #HTMLReporter .passingAlert:first-child { background-color: #5e7d00; }
30 | #HTMLReporter .failingAlert { background-color: #cf867e; }
31 | #HTMLReporter .failingAlert:first-child { background-color: #b03911; }
32 | #HTMLReporter .results { margin-top: 14px; }
33 | #HTMLReporter #details { display: none; }
34 | #HTMLReporter .resultsMenu, #HTMLReporter .resultsMenu a { background-color: #fff; color: #333333; }
35 | #HTMLReporter.showDetails .summaryMenuItem { font-weight: normal; text-decoration: inherit; }
36 | #HTMLReporter.showDetails .summaryMenuItem:hover { text-decoration: underline; }
37 | #HTMLReporter.showDetails .detailsMenuItem { font-weight: bold; text-decoration: underline; }
38 | #HTMLReporter.showDetails .summary { display: none; }
39 | #HTMLReporter.showDetails #details { display: block; }
40 | #HTMLReporter .summaryMenuItem { font-weight: bold; text-decoration: underline; }
41 | #HTMLReporter .summary { margin-top: 14px; }
42 | #HTMLReporter .summary .suite .suite, #HTMLReporter .summary .specSummary { margin-left: 14px; }
43 | #HTMLReporter .summary .specSummary.passed a { color: #5e7d00; }
44 | #HTMLReporter .summary .specSummary.failed a { color: #b03911; }
45 | #HTMLReporter .description + .suite { margin-top: 0; }
46 | #HTMLReporter .suite { margin-top: 14px; }
47 | #HTMLReporter .suite a { color: #333333; }
48 | #HTMLReporter #details .specDetail { margin-bottom: 28px; }
49 | #HTMLReporter #details .specDetail .description { display: block; color: white; background-color: #b03911; }
50 | #HTMLReporter .resultMessage { padding-top: 14px; color: #333333; }
51 | #HTMLReporter .resultMessage span.result { display: block; }
52 | #HTMLReporter .stackTrace { margin: 5px 0 0 0; max-height: 224px; overflow: auto; line-height: 18px; color: #666666; border: 1px solid #ddd; background: white; white-space: pre; }
53 | 
54 | #TrivialReporter { padding: 8px 13px; position: absolute; top: 0; bottom: 0; left: 0; right: 0; overflow-y: scroll; background-color: white; font-family: "Helvetica Neue Light", "Lucida Grande", "Calibri", "Arial", sans-serif; /*.resultMessage {*/ /*white-space: pre;*/ /*}*/ }
55 | #TrivialReporter a:visited, #TrivialReporter a { color: #303; }
56 | #TrivialReporter a:hover, #TrivialReporter a:active { color: blue; }
57 | #TrivialReporter .run_spec { float: right; padding-right: 5px; font-size: .8em; text-decoration: none; }
58 | #TrivialReporter .banner { color: #303; background-color: #fef; padding: 5px; }
59 | #TrivialReporter .logo { float: left; font-size: 1.1em; padding-left: 5px; }
60 | #TrivialReporter .logo .version { font-size: .6em; padding-left: 1em; }
61 | #TrivialReporter .runner.running { background-color: yellow; }
62 | #TrivialReporter .options { text-align: right; font-size: .8em; }
63 | #TrivialReporter .suite { border: 1px outset gray; margin: 5px 0; padding-left: 1em; }
64 | #TrivialReporter .suite .suite { margin: 5px; }
65 | #TrivialReporter .suite.passed { background-color: #dfd; }
66 | #TrivialReporter .suite.failed { background-color: #fdd; }
67 | #TrivialReporter .spec { margin: 5px; padding-left: 1em; clear: both; }
68 | #TrivialReporter .spec.failed, #TrivialReporter .spec.passed, #TrivialReporter .spec.skipped { padding-bottom: 5px; border: 1px solid gray; }
69 | #TrivialReporter .spec.failed { background-color: #fbb; border-color: red; }
70 | #TrivialReporter .spec.passed { background-color: #bfb; border-color: green; }
71 | #TrivialReporter .spec.skipped { background-color: #bbb; }
72 | #TrivialReporter .messages { border-left: 1px dashed gray; padding-left: 1em; padding-right: 1em; }
73 | #TrivialReporter .passed { background-color: #cfc; display: none; }
74 | #TrivialReporter .failed { background-color: #fbb; }
75 | #TrivialReporter .skipped { color: #777; background-color: #eee; display: none; }
76 | #TrivialReporter .resultMessage span.result { display: block; line-height: 2em; color: black; }
77 | #TrivialReporter .resultMessage .mismatch { color: black; }
78 | #TrivialReporter .stackTrace { white-space: pre; font-size: .8em; margin-left: 10px; max-height: 5em; overflow: auto; border: 1px inset red; padding: 1em; background: #eef; }
79 | #TrivialReporter .finished-at { padding-left: 1em; font-size: .6em; }
80 | #TrivialReporter.show-passed .passed, #TrivialReporter.show-skipped .skipped { display: block; }
81 | #TrivialReporter #jasmine_content { position: fixed; right: 100%; }
82 | #TrivialReporter .runner { border: 1px solid gray; display: block; margin: 5px 0; padding: 2px 0 2px 10px; }
83 | 


--------------------------------------------------------------------------------
/storage.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Get the storage object to use
  3 |  */
  4 | if (typeof(Storage) != "undefined") {
  5 |     // Yay, we have HTML5 local storage
  6 |     
  7 |     // add methods to allow storage of general objects (storing anything
  8 |     // other than strings may not work in all browsers)
  9 |     Storage.prototype.getObject = function(key) {
 10 |         var value = this.getItem(key);
 11 |         return value && JSON.parse(value);
 12 |     }
 13 | 
 14 |     Storage.prototype.setObject = function(key, value) {
 15 |         this.setItem(key, JSON.stringify(value));
 16 |     }
 17 |  }
 18 | 
 19 | function getStorage(backend) {
 20 |     // Create a databank and add some common prefixes
 21 |     var databank = createDatabank();   
 22 | 
 23 |     if(backend !== undefined) {
 24 |         return new ChatbotStorage(databank, backend);
 25 |     } else if (typeof(Storage) != "undefined") {
 26 |         return new ChatbotStorage(databank, new LocalStorage());
 27 |     } else {
 28 |         alert("no web storage, using Transient storage");
 29 |         return new ChatbotStorage(databank, new TransientStorage());
 30 |     }
 31 | }
 32 | 
 33 | function createDatabank() {
 34 |     // http://code.google.com/p/rdfquery/wiki/RdfPlugin
 35 |     return $.rdf.databank()
 36 |         .prefix('foaf', 'http://xmlns.com/foaf/0.1/')
 37 |         .prefix('dc', 'http://purl.org/dc/elements/1.1/')
 38 |         .prefix('dct', 'http://purl.org/dc/terms/')
 39 |         .prefix('sam', 'http://linklens.blogspot.com/'); 
 40 | }
 41 | 
 42 | function trim1 (str) {
 43 |     return str.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
 44 | }
 45 | 
 46 | function ChatbotStorage(db, backend) {
 47 |     this.databank = db;
 48 |     this.backend = backend;
 49 |     this.clearTranscript();
 50 | }
 51 | 
 52 | ChatbotStorage.prototype.getDatabank = function() {
 53 |     return this.databank;
 54 | }
 55 | 
 56 | ChatbotStorage.prototype.getTranscript = function() {
 57 |     return this.transcript;
 58 | }
 59 | 
 60 | ChatbotStorage.prototype.getKnowledgeBaseAsText = function() {
 61 |     return this.backend.getItem('rdf');
 62 | }
 63 | 
 64 | ChatbotStorage.prototype.isEmpty = function() {
 65 |     return this.backend.getItem('rdf') == undefined;
 66 | }
 67 | 
 68 | ChatbotStorage.prototype.clearDatabank = function() {
 69 |     this.databank = createDatabank();
 70 |     this.backend.setItem("rdf", "");
 71 | }
 72 | 
 73 | ChatbotStorage.prototype.storeEntity = function(object,name){
 74 |   name = name.replace(' ','_');
 75 |   this.getDatabank()
 76 |       .add(stringToResource(name) + ' a ' + quote(object))
 77 |       .add(stringToResource(name) + ' foaf:name ' + quote(name));
 78 | }
 79 | 
 80 | ChatbotStorage.prototype.storeProperty = function (object, relation, name){
 81 |   object = object.replace(' ','_');
 82 |   this.getDatabank()
 83 |       .add(stringToResource(object) + ' sam:' + relation + ' ' + quote(name));
 84 | }
 85 | 
 86 | ChatbotStorage.prototype.queryProperty = function (object, relation){
 87 |   object = object.replace(' ','_');
 88 |   var raw = $.rdf({databank:this.getDatabank()}).where('_:'+object+' sam:'+relation+' ?value').select(['value'])[0];
 89 |   if(raw === undefined){
 90 |     return undefined;
 91 |   }
 92 |   return { value: raw.value.value };
 93 | }
 94 | 
 95 | ChatbotStorage.prototype.queryAllProperties = function (object){
 96 |   object = object.replace(' ','_');
 97 |   var results = $.rdf({databank:this.getDatabank()}).where('_:'+object+' ?relation ?value').select(['relation','value']);
 98 |   var response = [];
 99 |   for(var i in results){
100 |     response.push({'name':results[i].value.value,'relation':results[i].relation.value.path.substring(1)});
101 |   }
102 |   return response;
103 | }
104 | 
105 | ChatbotStorage.prototype.queryEntity = function(name) {
106 |   // doing this because databank seems to introduce trailing space into name
107 |   // TODO contact the rdf project people to let them know
108 |   var raw = $.rdf({databank:this.getDatabank()}).where('_:'+name+' a ?type').select(['type'])[0];
109 |   if(raw === undefined){
110 |     return undefined;
111 |   }
112 |   var value = raw.type.value || "";
113 |   return { type: value.trim()};
114 | }
115 | 
116 | ChatbotStorage.prototype.clearTranscript = function() {
117 |     this.transcript = [ ];
118 | }
119 | 
120 | ChatbotStorage.prototype.addToTranscript = function(who, what) {
121 |     var entry =  { timestamp: new Date(), actor: who, text: what };
122 |     this.transcript.push(entry);
123 | }
124 | 
125 | ChatbotStorage.prototype.loadKnowledgeBaseFromString = function(turtle) {
126 |     this.databank.load(turtle, { format: 'text/turtle'});
127 | }
128 | 
129 | ChatbotStorage.prototype.load = function() {
130 |     var turtle = this.getKnowledgeBaseAsText();
131 |     if (turtle !== null) {
132 |         // trim any whitespace
133 |         turtle = trim1(turtle);
134 | 
135 |         // trim any surrounding double quotes
136 |         if (turtle.substring(0,1) === '"') {
137 |             turtle = turtle.substring(1, turtle.length-2);
138 |         }
139 |         this.databank.load(turtle, { format: 'text/turtle'});
140 |     }
141 | 
142 |     var ts = this.backend.getObject("transcript");
143 |     if (ts != null) {
144 |         this.transcript = ts;
145 |     }
146 | }
147 | 
148 | ChatbotStorage.prototype.save = function() {
149 |     var turtle = this.databank.dump({ format: 'text/turtle'});
150 |     this.backend.setItem("rdf", turtle);
151 |     this.backend.setObject("transcript", this.transcript);
152 | }
153 | 
154 | 
155 | /*
156 |  * Wrapper class using HTML5 storage. Need this because we can't seem to
157 |  * return localStorage from functions
158 |  */
159 | function LocalStorage() {
160 | }
161 | 
162 | LocalStorage.prototype.getItem = function(key) {
163 |     return localStorage.getItem(key);
164 | }
165 | 
166 | LocalStorage.prototype.setItem = function(key, value) {
167 |     localStorage.setItem(key, value)
168 | }
169 | 
170 | LocalStorage.prototype.getObject = function(key) {
171 |     return localStorage.getObject(key);
172 | }
173 | 
174 | LocalStorage.prototype.setObject = function(key, value) {
175 |     localStorage.setObject(key, value);
176 | }
177 | 
178 | /*
179 |  * Fallback class to give us the illusion of storage if HTML5 storage is
180 |  * not available - works until we refresh or leave the page.
181 |  */
182 | function TransientStorage() {
183 |     this.store = new Object();
184 | }
185 | 
186 | TransientStorage.prototype.getItem = function(key) {
187 |     return this.store[key];
188 | }
189 | 
190 | TransientStorage.prototype.setItem = function(key, value) {
191 |     this.store[key] = value;
192 | }
193 | 
194 | TransientStorage.prototype.getObject = function(key) {
195 |     return this.getItem(key);
196 | }
197 | 
198 | TransientStorage.prototype.setObject = function(key, value) {
199 |     this.setItem(key, value);
200 | }
201 | 
202 | 


--------------------------------------------------------------------------------
/h1-p/count_freqs.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | 
  3 | __author__="Daniel Bauer <bauer@cs.columbia.edu>"
  4 | __date__ ="$Sep 12, 2011"
  5 | 
  6 | import sys
  7 | from collections import defaultdict
  8 | import math
  9 | 
 10 | """
 11 | Count n-gram frequencies in a data file and write counts to
 12 | stdout. 
 13 | """
 14 | 
 15 | def simple_conll_corpus_iterator(corpus_file):
 16 |     """
 17 |     Get an iterator object over the corpus file. The elements of the
 18 |     iterator contain (word, ne_tag) tuples. Blank lines, indicating
 19 |     sentence boundaries return (None, None).
 20 |     """
 21 |     l = corpus_file.readline()
 22 |     while l:
 23 |         line = l.strip()
 24 |         if line: # Nonempty line
 25 |             # Extract information from line.
 26 |             # Each line has the format
 27 |             # word pos_tag phrase_tag ne_tag
 28 |             fields = line.split(" ")
 29 |             ne_tag = fields[-1]
 30 |             #phrase_tag = fields[-2] #Unused
 31 |             #pos_tag = fields[-3] #Unused
 32 |             word = " ".join(fields[:-1])
 33 |             yield word, ne_tag
 34 |         else: # Empty line
 35 |             yield (None, None)                        
 36 |         l = corpus_file.readline()
 37 | 
 38 | def sentence_iterator(corpus_iterator):
 39 |     """
 40 |     Return an iterator object that yields one sentence at a time.
 41 |     Sentences are represented as lists of (word, ne_tag) tuples.
 42 |     """
 43 |     current_sentence = [] #Buffer for the current sentence
 44 |     for l in corpus_iterator:        
 45 |             if l==(None, None):
 46 |                 if current_sentence:  #Reached the end of a sentence
 47 |                     yield current_sentence
 48 |                     current_sentence = [] #Reset buffer
 49 |                 else: # Got empty input stream
 50 |                     sys.stderr.write("WARNING: Got empty input file/stream.\n")
 51 |                     raise StopIteration
 52 |             else:
 53 |                 current_sentence.append(l) #Add token to the buffer
 54 | 
 55 |     if current_sentence: # If the last line was blank, we're done
 56 |         yield current_sentence  #Otherwise when there is no more token
 57 |                                 # in the stream return the last sentence.
 58 | 
 59 | def get_ngrams(sent_iterator, n):
 60 |     """
 61 |     Get a generator that returns n-grams over the entire corpus,
 62 |     respecting sentence boundaries and inserting boundary tokens.
 63 |     Sent_iterator is a generator object whose elements are lists
 64 |     of tokens.
 65 |     """
 66 |     for sent in sent_iterator:
 67 |          #Add boundary symbols to the sentence
 68 |          w_boundary = (n-1) * [(None, "*")]
 69 |          w_boundary.extend(sent)
 70 |          w_boundary.append((None, "STOP"))
 71 |          #Then extract n-grams
 72 |          ngrams = (tuple(w_boundary[i:i+n]) for i in xrange(len(w_boundary)-n+1))
 73 |          for n_gram in ngrams: #Return one n-gram at a time
 74 |             yield n_gram        
 75 | 
 76 | 
 77 | class Hmm(object):
 78 |     """
 79 |     Stores counts for n-grams and emissions. 
 80 |     """
 81 | 
 82 |     def __init__(self, n=3):
 83 |         assert n>=2, "Expecting n>=2."
 84 |         self.n = n
 85 |         self.emission_counts = defaultdict(int)
 86 |         self.ngram_counts = [defaultdict(int) for i in xrange(self.n)]
 87 |         self.all_states = set()
 88 | 
 89 |     def train(self, corpus_file):
 90 |         """
 91 |         Count n-gram frequencies and emission probabilities from a corpus file.
 92 |         """
 93 |         ngram_iterator = \
 94 |             get_ngrams(sentence_iterator(simple_conll_corpus_iterator(corpus_file)), self.n)
 95 | 
 96 |         for ngram in ngram_iterator:
 97 |             #Sanity check: n-gram we get from the corpus stream needs to have the right length
 98 |             assert len(ngram) == self.n, "ngram in stream is %i, expected %i" % (len(ngram, self.n))
 99 | 
100 |             tagsonly = tuple([ne_tag for word, ne_tag in ngram]) #retrieve only the tags            
101 |             for i in xrange(2, self.n+1): #Count NE-tag 2-grams..n-grams
102 |                 self.ngram_counts[i-1][tagsonly[-i:]] += 1
103 |             
104 |             if ngram[-1][0] is not None: # If this is not the last word in a sentence
105 |                 self.ngram_counts[0][tagsonly[-1:]] += 1 # count 1-gram
106 |                 self.emission_counts[ngram[-1]] += 1 # and emission frequencies
107 | 
108 |             # Need to count a single n-1-gram of sentence start symbols per sentence
109 |             if ngram[-2][0] is None: # this is the first n-gram in a sentence
110 |                 self.ngram_counts[self.n - 2][tuple((self.n - 1) * ["*"])] += 1
111 | 
112 |     def write_counts(self, output, printngrams=[1,2,3]):
113 |         """
114 |         Writes counts to the output file object.
115 |         Format:
116 | 
117 |         """
118 |         # First write counts for emissions
119 |         for word, ne_tag in self.emission_counts:            
120 |             output.write("%i WORDTAG %s %s\n" % (self.emission_counts[(word, ne_tag)], ne_tag, word))
121 | 
122 | 
123 |         # Then write counts for all ngrams
124 |         for n in printngrams:            
125 |             for ngram in self.ngram_counts[n-1]:
126 |                 ngramstr = " ".join(ngram)
127 |                 output.write("%i %i-GRAM %s\n" %(self.ngram_counts[n-1][ngram], n, ngramstr))
128 | 
129 |     def read_counts(self, corpusfile):
130 | 
131 |         self.n = 3
132 |         self.emission_counts = defaultdict(int)
133 |         self.ngram_counts = [defaultdict(int) for i in xrange(self.n)]
134 |         self.all_states = set()
135 | 
136 |         for line in corpusfile:
137 |             parts = line.strip().split(" ")
138 |             count = float(parts[0])
139 |             if parts[1] == "WORDTAG":
140 |                 ne_tag = parts[2]
141 |                 word = parts[3]
142 |                 self.emission_counts[(word, ne_tag)] = count
143 |                 self.all_states.add(ne_tag)
144 |             elif parts[1].endswith("GRAM"):
145 |                 n = int(parts[1].replace("-GRAM",""))
146 |                 ngram = tuple(parts[2:])
147 |                 self.ngram_counts[n-1][ngram] = count
148 |                 
149 | 
150 | 
151 | def usage():
152 |     print """
153 |     python count_freqs.py [input_file] > [output_file]
154 |         Read in a gene tagged training input file and produce counts.
155 |     """
156 | 
157 | if __name__ == "__main__":
158 | 
159 |     if len(sys.argv)!=2: # Expect exactly one argument: the training data file
160 |         usage()
161 |         sys.exit(2)
162 | 
163 |     try:
164 |         input = file(sys.argv[1],"r")
165 |     except IOError:
166 |         sys.stderr.write("ERROR: Cannot read inputfile %s.\n" % arg)
167 |         sys.exit(1)
168 |     
169 |     # Initialize a trigram counter
170 |     counter = Hmm(3)
171 |     # Collect counts
172 |     counter.train(input)
173 |     # Write the counts
174 |     counter.write_counts(sys.stdout)
175 | 


--------------------------------------------------------------------------------
/thirdparty/removeStopWords.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * String method to remove stop words
  3 |  * Written by GeekLad http://geeklad.com
  4 |  * Stop words obtained from http://www.lextek.com/manuals/onix/stopwords1.html
  5 |  *   Usage: string_variable.removeStopWords();
  6 |  *   Output: The original String with stop words removed
  7 |  */
  8 | String.prototype.removeStopWords = function() {
  9 | 	var x;
 10 | 	var y;
 11 | 	var word;
 12 | 	var stop_word;
 13 | 	var regex_str;
 14 | 	var regex;
 15 | 	var cleansed_string = this.valueOf();
 16 | 	var stop_words = new Array(
 17 | 		'a',
 18 | 		'about',
 19 | 		'above',
 20 | 		'across',
 21 | 		'after',
 22 | 		'again',
 23 | 		'against',
 24 | 		'all',
 25 | 		'almost',
 26 | 		'alone',
 27 | 		'along',
 28 | 		'already',
 29 | 		'also',
 30 | 		'although',
 31 | 		'always',
 32 | 		'among',
 33 | 		'an',
 34 | 		'and',
 35 | 		'another',
 36 | 		'any',
 37 | 		'anybody',
 38 | 		'anyone',
 39 | 		'anything',
 40 | 		'anywhere',
 41 | 		'are',
 42 | 		'area',
 43 | 		'areas',
 44 | 		'around',
 45 | 		'as',
 46 | 		'ask',
 47 | 		'asked',
 48 | 		'asking',
 49 | 		'asks',
 50 | 		'at',
 51 | 		'away',
 52 | 		'b',
 53 | 		'back',
 54 | 		'backed',
 55 | 		'backing',
 56 | 		'backs',
 57 | 		'be',
 58 | 		'became',
 59 | 		'because',
 60 | 		'become',
 61 | 		'becomes',
 62 | 		'been',
 63 | 		'before',
 64 | 		'began',
 65 | 		'behind',
 66 | 		'being',
 67 | 		'beings',
 68 | 		'best',
 69 | 		'better',
 70 | 		'between',
 71 | 		'big',
 72 | 		'both',
 73 | 		'but',
 74 | 		'by',
 75 | 		'c',
 76 | 		'came',
 77 | 		'can',
 78 | 		'cannot',
 79 | 		'case',
 80 | 		'cases',
 81 | 		'certain',
 82 | 		'certainly',
 83 | 		'clear',
 84 | 		'clearly',
 85 | 		'come',
 86 | 		'could',
 87 | 		'd',
 88 | 		'did',
 89 | 		'differ',
 90 | 		'different',
 91 | 		'differently',
 92 | 		'do',
 93 | 		'does',
 94 | 		'done',
 95 | 		'down',
 96 | 		'down',
 97 | 		'downed',
 98 | 		'downing',
 99 | 		'downs',
100 | 		'during',
101 | 		'e',
102 | 		'each',
103 | 		'early',
104 | 		'either',
105 | 		'end',
106 | 		'ended',
107 | 		'ending',
108 | 		'ends',
109 | 		'enough',
110 | 		'even',
111 | 		'evenly',
112 | 		'ever',
113 | 		'every',
114 | 		'everybody',
115 | 		'everyone',
116 | 		'everything',
117 | 		'everywhere',
118 | 		'f',
119 | 		'face',
120 | 		'faces',
121 | 		'fact',
122 | 		'facts',
123 | 		'far',
124 | 		'felt',
125 | 		'few',
126 | 		'find',
127 | 		'finds',
128 | 		'first',
129 | 		'for',
130 | 		'four',
131 | 		'from',
132 | 		'full',
133 | 		'fully',
134 | 		'further',
135 | 		'furthered',
136 | 		'furthering',
137 | 		'furthers',
138 | 		'g',
139 | 		'gave',
140 | 		'general',
141 | 		'generally',
142 | 		'get',
143 | 		'gets',
144 | 		'give',
145 | 		'given',
146 | 		'gives',
147 | 		'go',
148 | 		'going',
149 | 		'good',
150 | 		'goods',
151 | 		'got',
152 | 		'great',
153 | 		'greater',
154 | 		'greatest',
155 | 		'group',
156 | 		'grouped',
157 | 		'grouping',
158 | 		'groups',
159 | 		'h',
160 | 		'had',
161 | 		'has',
162 | 		'have',
163 | 		'having',
164 | 		'he',
165 | 		'her',
166 | 		'here',
167 | 		'herself',
168 | 		'high',
169 | 		'high',
170 | 		'high',
171 | 		'higher',
172 | 		'highest',
173 | 		'him',
174 | 		'himself',
175 | 		'his',
176 | 		'how',
177 | 		'however',
178 | 		'i',
179 | 		'if',
180 | 		'important',
181 | 		'in',
182 | 		'interest',
183 | 		'interested',
184 | 		'interesting',
185 | 		'interests',
186 | 		'into',
187 | 		'is',
188 | 		'it',
189 | 		'its',
190 | 		'itself',
191 | 		'j',
192 | 		'just',
193 | 		'k',
194 | 		'keep',
195 | 		'keeps',
196 | 		'kind',
197 | 		'knew',
198 | 		'know',
199 | 		'known',
200 | 		'knows',
201 | 		'l',
202 | 		'large',
203 | 		'largely',
204 | 		'last',
205 | 		'later',
206 | 		'latest',
207 | 		'least',
208 | 		'less',
209 | 		'let',
210 | 		'lets',
211 | 		'like',
212 | 		'likely',
213 | 		'long',
214 | 		'longer',
215 | 		'longest',
216 | 		'm',
217 | 		'made',
218 | 		'make',
219 | 		'making',
220 | 		'man',
221 | 		'many',
222 | 		'may',
223 | 		'me',
224 | 		'member',
225 | 		'members',
226 | 		'men',
227 | 		'might',
228 | 		'more',
229 | 		'most',
230 | 		'mostly',
231 | 		'mr',
232 | 		'mrs',
233 | 		'much',
234 | 		'must',
235 | 		'my',
236 | 		'myself',
237 | 		'n',
238 | 		'necessary',
239 | 		'need',
240 | 		'needed',
241 | 		'needing',
242 | 		'needs',
243 | 		'never',
244 | 		'new',
245 | 		'new',
246 | 		'newer',
247 | 		'newest',
248 | 		'next',
249 | 		'no',
250 | 		'nobody',
251 | 		'non',
252 | 		'noone',
253 | 		'not',
254 | 		'nothing',
255 | 		'now',
256 | 		'nowhere',
257 | 		'number',
258 | 		'numbers',
259 | 		'o',
260 | 		'of',
261 | 		'off',
262 | 		'often',
263 | 		'old',
264 | 		'older',
265 | 		'oldest',
266 | 		'on',
267 | 		'once',
268 | 		'one',
269 | 		'only',
270 | 		'open',
271 | 		'opened',
272 | 		'opening',
273 | 		'opens',
274 | 		'or',
275 | 		'order',
276 | 		'ordered',
277 | 		'ordering',
278 | 		'orders',
279 | 		'other',
280 | 		'others',
281 | 		'our',
282 | 		'out',
283 | 		'over',
284 | 		'p',
285 | 		'part',
286 | 		'parted',
287 | 		'parting',
288 | 		'parts',
289 | 		'per',
290 | 		'perhaps',
291 | 		'place',
292 | 		'places',
293 | 		'point',
294 | 		'pointed',
295 | 		'pointing',
296 | 		'points',
297 | 		'possible',
298 | 		'present',
299 | 		'presented',
300 | 		'presenting',
301 | 		'presents',
302 | 		'problem',
303 | 		'problems',
304 | 		'put',
305 | 		'puts',
306 | 		'q',
307 | 		'quite',
308 | 		'r',
309 | 		'rather',
310 | 		'really',
311 | 		'right',
312 | 		'right',
313 | 		'room',
314 | 		'rooms',
315 | 		's',
316 | 		'said',
317 | 		'same',
318 | 		'saw',
319 | 		'say',
320 | 		'says',
321 | 		'second',
322 | 		'seconds',
323 | 		'see',
324 | 		'seem',
325 | 		'seemed',
326 | 		'seeming',
327 | 		'seems',
328 | 		'sees',
329 | 		'several',
330 | 		'shall',
331 | 		'she',
332 | 		'should',
333 | 		'show',
334 | 		'showed',
335 | 		'showing',
336 | 		'shows',
337 | 		'side',
338 | 		'sides',
339 | 		'since',
340 | 		'small',
341 | 		'smaller',
342 | 		'smallest',
343 | 		'so',
344 | 		'some',
345 | 		'somebody',
346 | 		'someone',
347 | 		'something',
348 | 		'somewhere',
349 | 		'state',
350 | 		'states',
351 | 		'still',
352 | 		'still',
353 | 		'such',
354 | 		'sure',
355 | 		't',
356 | 		'take',
357 | 		'taken',
358 | 		'than',
359 | 		'that',
360 | 		'the',
361 | 		'their',
362 | 		'them',
363 | 		'then',
364 | 		'there',
365 | 		'therefore',
366 | 		'these',
367 | 		'they',
368 | 		'thing',
369 | 		'things',
370 | 		'think',
371 | 		'thinks',
372 | 		'this',
373 | 		'those',
374 | 		'though',
375 | 		'thought',
376 | 		'thoughts',
377 | 		'three',
378 | 		'through',
379 | 		'thus',
380 | 		'to',
381 | 		'today',
382 | 		'together',
383 | 		'too',
384 | 		'took',
385 | 		'toward',
386 | 		'turn',
387 | 		'turned',
388 | 		'turning',
389 | 		'turns',
390 | 		'two',
391 | 		'u',
392 | 		'under',
393 | 		'until',
394 | 		'up',
395 | 		'upon',
396 | 		'us',
397 | 		'use',
398 | 		'used',
399 | 		'uses',
400 | 		'v',
401 | 		'very',
402 | 		'w',
403 | 		'want',
404 | 		'wanted',
405 | 		'wanting',
406 | 		'wants',
407 | 		'was',
408 | 		'way',
409 | 		'ways',
410 | 		'we',
411 | 		'well',
412 | 		'wells',
413 | 		'went',
414 | 		'were',
415 | 		'what',
416 | 		'when',
417 | 		'where',
418 | 		'whether',
419 | 		'which',
420 | 		'while',
421 | 		'who',
422 | 		'whole',
423 | 		'whose',
424 | 		'why',
425 | 		'will',
426 | 		'with',
427 | 		'within',
428 | 		'without',
429 | 		'work',
430 | 		'worked',
431 | 		'working',
432 | 		'works',
433 | 		'would',
434 | 		'x',
435 | 		'y',
436 | 		'year',
437 | 		'years',
438 | 		'yet',
439 | 		'you',
440 | 		'young',
441 | 		'younger',
442 | 		'youngest',
443 | 		'your',
444 | 		'yours',
445 | 		'z'
446 | 	)
447 | 
448 | 	// Split out all the individual words in the phrase
449 | 	words = cleansed_string.match(/[^\s]+|\s+[^\s+]$/g)
450 | 
451 | 	// Review all the words
452 | 	for(x=0; x < words.length; x++) {
453 | 		// For each word, check all the stop words
454 | 		for(y=0; y < stop_words.length; y++) {
455 | 			// Get the current word
456 | 			word = words[x].replace(/\s+|[^a-z]+/ig, "");	// Trim the word and remove non-alpha
457 | 
458 | 			// Get the stop word
459 | 			stop_word = stop_words[y];
460 | 
461 | 			// If the word matches the stop word, remove it from the keywords
462 | 			if(word.toLowerCase() == stop_word) {
463 | 				// Build the regex
464 | 				regex_str = "^\\s*"+stop_word+"\\s*$";		// Only word
465 | 				regex_str += "|^\\s*"+stop_word+"\\s+";		// First word
466 | 				regex_str += "|\\s+"+stop_word+"\\s*$";		// Last word
467 | 				regex_str += "|\\s+"+stop_word+"\\s+";		// Word somewhere in the middle
468 | 				regex = new RegExp(regex_str, "ig");
469 | 
470 | 				// Remove the word from the keywords
471 | 				cleansed_string = cleansed_string.replace(regex, " ");
472 | 			}
473 | 		}
474 | 	}
475 | 	return cleansed_string.replace(/^\s+|\s+$/g, "");
476 | }


--------------------------------------------------------------------------------
/h1-p/viterbi.js:
--------------------------------------------------------------------------------
  1 | // should use a gist - use API to grab data from that
  2 | // or trying to grab it from my file system - 
  3 | // but either way we are now testing something that doesn't operate immediately
  4 | // so we have to have a special type of test - have to look up testing 
  5 | // ajax http calls in jasmine
  6 | function grab() {
  7 | $.get('http://127.0.0.1/~sam/Github/faqbot/h1-p/gene.train.head', function(data) {
  8 |     $('#result').text(data);
  9 |     // so issue here is that we'd be quite happy to block waiting for this data
 10 |     // having pulled it in a single time ...
 11 | }, "text");
 12 | }
 13 | 
 14 | // so now we are pulling in data from file system - could pull in larger file?
 15 | // test will be slow .. so? could be separate test ... not sure how we can 
 16 | // write results out ... just dump to browser?
 17 | // need some other kind of interface to use system other than testing one ...
 18 | // just like we have in faqbot ...
 19 | 
 20 | function callAjax(callback,filename) {
 21 | 	return $.ajax({
 22 | 	    url: "/~sam/Github/faqbot/h1-p/"+filename,
 23 | 	    success: callback
 24 | 	});
 25 | }
 26 | 
 27 | // could have been testing this at a much lower level?
 28 | function count(data){
 29 | 	// Comparison O
 30 | 	var word_tags = new Hash({},0);
 31 | 	var grams = new Hash({1:{},2:{},3:{}},0);
 32 | 	var lines = data.split('\n');
 33 | 	var word, category; // could start with category being *, increment grams, and then ...
 34 | 	var category_minus_one = '*';
 35 | 	var category_minus_two = '*';
 36 | 	for(var i in lines){
 37 | 		//debugger
 38 | 		tokens = lines[i].split(' ');
 39 | 		word = tokens[0];
 40 | 		category = tokens[1]; // e.g. 'O' or 'I-GENE'
 41 | 		if(word === ''){ // is this our sentence break identifier
 42 | 			category = 'STOP';
 43 | 		}
 44 | 		else{
 45 | 			word_tags.set([word,category],word_tags.get([word,category])+1)
 46 | 		}
 47 |     grams.set([1,category], grams.get([1,category])+1);
 48 |     grams.set([2,category_minus_one,category], grams.get([2,category_minus_one,category])+1);
 49 |     grams.set([3,category_minus_two,category_minus_one,category], grams.get([3,category_minus_two,category_minus_one,category])+1);
 50 | 
 51 |     if(category === 'STOP'){
 52 |     	category_minus_one = '*';
 53 | 	    category_minus_two = '*';
 54 | 	    grams.set([2,category_minus_two,category_minus_one], grams.get([2,category_minus_two,category_minus_one])+1);// HACK!!!!
 55 |     }
 56 |     else{
 57 |       category_minus_two = category_minus_one;
 58 |     	category_minus_one = category;
 59 |     }
 60 | 	}
 61 | 	return {'grams':grams, 'word_tags':word_tags};
 62 | }
 63 | 
 64 | function emission(word,category,word_tags,grams){
 65 |   var numerator = word_tags.get([word,category]);
 66 |   var denominator = grams.get(['1',category]);
 67 |   if(denominator == 0 ){
 68 | 		return 0;
 69 | 	}
 70 |   return numerator/denominator;
 71 | }
 72 | 
 73 | function conditionalTrigramProbability(z,x,y,grams){
 74 | 	var numerator = grams.get(['3',x,y,z]);
 75 | 	var denominator = grams.get(['2',x,y]);
 76 | 	if(denominator == 0 ){
 77 | 		return 0;
 78 | 	}
 79 | 	return numerator/denominator;
 80 | }
 81 | 
 82 | function rarify(data,rareSymbol,rareThreshold){
 83 |    // NOTE THIS IS CHANGING UNDERLYING DATA STRUCTURE ... 
 84 |    var word_tags = data.word_tags;
 85 |    //debugger
 86 | 
 87 |    // seems like we should initialize the rare keyword
 88 |    // although rare should never be one I guess - makes no sense ...
 89 |    // TODO keys method for Hash object?
 90 |    for(var word in word_tags.hash){
 91 |    	  var sum = 0;
 92 |    	  for(var category in word_tags.get([word])){
 93 |    	  	 sum+= word_tags.get([word,category]);
 94 |    	  }
 95 |    	  if(sum<rareThreshold){
 96 |    	  	 for(var category in word_tags.get([word])){
 97 |    	  	    word_tags.set([rareSymbol,category],word_tags.get([rareSymbol,category]) + word_tags.get([word,category]));
 98 |    	     }
 99 |    	  	 word_tags.delete([word]);
100 |    	  }
101 |    }
102 |    data.word_tags = word_tags;
103 |    return data;
104 | }
105 | 
106 | function getSet(position){
107 | 	if(position == -2 || position == -1){
108 | 		return {'*':undefined};
109 | 	}
110 | 	return {'STOP':undefined,'I-GENE':undefined,'O':undefined};
111 | }
112 | 
113 | function viterbi(sentence,result){
114 | 	// Input: a sentence x_1 ... x_n, parameters q(s|u, v) and e(x|s).
115 |   var word_tags = result.word_tags;
116 |   var grams = result.grams;
117 | 	
118 | 	var pi = new Hash(); // maximum probability of a tag sequence ending in tags u, v at position k
119 | 	var bp = new Hash();
120 | 	// Initialization: Set pi(0,*,*) = 1
121 | 	pi.set([-1,'*','*'],1);
122 | 	// Define Sk for k = -1 ... n to be the set of possible tags at position k
123 | 	// Definition: S_-1 = S_0 = {*}, S_k = S for k element of {1...n} set of possible tags
124 | 
125 | 	// Algorithm notation has positions -1 and 0 as prior to the actual sentence, which runs 1...n
126 | 	// our loop naturally runs from 0...n-1, making the prior positions -2 and -1
127 |     
128 | 	// Algorithm:
129 | 	// For k = 1...n,  or in our case 0...n-1
130 | 	// debugger
131 | 	var words = sentence.split(' ');
132 | 	var n = words.length; // length of sentence
133 | 	for(var k in words){
134 | 	//  For u element of  S_k-1, v element of S_k,
135 | 		for(var u in getSet(k-1)){
136 | 			for(var v in getSet(k)){
137 | 			// pi(k,u,v) = max_(w elementof S_k-2) (pi(k-1,w,u) x q(v|w,u) x e(x_k|v))
138 | 
139 | 				var max = 0;
140 | 				var max_w = null;
141 | 				var temp = 0;
142 | 				var temp_pi = 0;
143 | 				for(var w in getSet(k-2)){
144 | 					//debugger
145 | 					// TODO can we have separate testing for more than just conditionalTrigramProbability and emission?
146 | 					// would require me to understand better what was going on here ... test getSet functions?
147 | 					temp_pi = pi.get([k-1,w,u]);
148 | 					// breakdown here seems to be partly that we are not handling _RARE_ words correctly
149 | 					// if emission probability is zero we should switch to _RARE_ probability
150 | 					p_emission = emission(words[k],v,word_tags,grams);
151 | 					if (p_emission === 0) {p_emission = emission("_RARE_",v,word_tags,grams);}
152 | 					temp = temp_pi * conditionalTrigramProbability(v,w,u,grams) * p_emission;
153 | 					if(temp >= max){
154 | 						max = temp;
155 | 						max_w = w;
156 | 					}
157 | 				}
158 | 				pi.set([k,u,v],max);
159 | 				// TODO calculate backpointer
160 | 				//  bp(k,u,v) = arg max (π(k−1,w,u)×q(v|w,u)×e(xk|v)) w∈Sk−2
161 | 				bp.set([k,u,v],max_w);
162 | 			}
163 | 		}
164 | 	}
165 | 
166 |   // IDEALLY I WOULD BE UNDERSTANDING ALL THIS AT A LOWER LEVEL .... OR SHOULD WE JUST GO BACK TO FAQBOT?
167 |   // NEED SIMPLER COMPONENTS AND TEST DATA TO CHECK THIS IS ALL WORKING ...
168 | 
169 | 	//Return max_[u element of S_n-1,v element of S_n] (pi(n,u,v) x q(STOP|u,v))
170 | 	//debugger
171 | 	var max = 0;
172 | 	var y = {};
173 | 	var temp = 0;
174 | 	for(var u in getSet(n-2)){
175 | 		for(var v in getSet(n-1)){
176 | 			temp = pi.get([n-1,u,v]) * conditionalTrigramProbability('STOP',u,v,grams);
177 | 			if(temp >= max){
178 | 				max = temp;
179 | 				//Set (yn−1, yn) = arg max(u,v) (π(n, u, v) × q(STOP|u, v)) 
180 | 				y[n-2] = u;
181 | 				y[n-1] = v;
182 | 			}
183 | 		}
184 | 	}
185 | 	//debugger
186 | 	// For k=(n−2)...1,yk = bp(k+2,y_k+1,y_k+2)
187 | 	for(var k = n-3; k>=0;k--){
188 | 		y[k] = bp.get([k+2,y[k+1],y[k+2]]);
189 | 	}
190 | 
191 | 	return {tag_sequence:y,max:max};
192 | }
193 | 
194 | function tag(devData, result, rareSymbol){
195 | 	var lines = devData.split('\n');
196 | 	var word_tags = result.word_tags;
197 | 	for(var i in lines){
198 | 		var word = lines[i];
199 | 		// so I need the emission probabilities looked up by word
200 | 		var highest = 0;
201 | 		var output = '';
202 |         // if we haven't encountered word we need to assign using _RARE_
203 |         if(word !== ''){
204 |         	//debugger
205 | 	        if(word_tags.get([word]) === 0){
206 | 	        	word = rareSymbol;
207 | 	        }
208 | 			for(var category in word_tags.get([word])){
209 | 				var emission = word_tags.get([word,category])/result.grams.get([1,category]);
210 | 				if(emission > highest){
211 | 					highest = emission;
212 | 					output = category;
213 | 				}
214 | 			}
215 | 			lines[i] = lines[i]+' '+output;
216 | 			lines[i] = lines[i].trim()
217 | 		}
218 |         // would like to return the data, but need to output file for NLP
219 | 	}
220 | 	return lines.join('\n').trim();
221 | } // all getting a bit smelly - could we have driven this with more fine-grained tests.
222 | 
223 | /*
224 | 
225 | 1 WORDTAG O mind
226 | 20 WORDTAG O resting
227 | 1 WORDTAG I-GENE SOX
228 | 2 WORDTAG I-GENE holoenzyme
229 | 2 WORDTAG I-GENE hydrolase
230 | 2 WORDTAG I-GENE barley
231 | 2 WORDTAG O glotticq
232 | 
233 | */
234 | 


--------------------------------------------------------------------------------
/h1-p/ViterbiSpec.js:
--------------------------------------------------------------------------------
  1 | describe("Viterbi", function() {
  2 |   var sentences = [];
  3 |   var answers = [];
  4 |   var gene_train_head = "";
  5 | 
  6 |   var rareKeyword = '_RARE_'
  7 | 
  8 |   var gene_train_head_direct = "Comparison O\n\
  9 | Comparison O\n\
 10 | with O\n\
 11 | alkaline I-GENE\n\
 12 | phosphatases I-GENE\n\
 13 | and O\n\
 14 | 5 I-GENE\n\
 15 | - I-GENE\n\
 16 | nucleotidase I-GENE\n\
 17 | \n\
 18 | Pharmacologic O\n";
 19 | // at the moment these all get assigned I-GENE as our micro training data has
 20 | // no words that aren't rare to I-GENE and O are given equal prob., and the first
 21 | // category (I-GENE) is assigned to everything ...
 22 | // a better test would use a lower threshold
 23 | var gene_key_head_direct = "BACKGROUND I-GENE\n\
 24 | : I-GENE\n\
 25 | Ischemic I-GENE\n\
 26 | heart I-GENE\n\
 27 | disease I-GENE\n\
 28 | is I-GENE\n\
 29 | the I-GENE\n\
 30 | primary I-GENE\n\
 31 | cause I-GENE\n\
 32 | of O\n";
 33 | 
 34 |   // what's the approach to test ajax calls in jasmine
 35 |   //$.get('http://127.0.0.1/~sam/Github/faqbot/h1-p/gene.train.head', function(data) {
 36 |     // so issue here is that we'd be quite happy to block waiting for this data
 37 |     // having pulled it in a single time ...
 38 |     // TODO make sure this blocks before we get to specs
 39 |     //gene_train_head = data;
 40 | 
 41 |   //}, "text");
 42 | 
 43 |   describe("counting", function () {
 44 |     var callback,trainingData,devData,keyData;
 45 | 
 46 |     beforeEach(function() {
 47 |       callback = jasmine.createSpy();
 48 |       callAjax(callback,'gene.train.head2');
 49 |       waitsFor(function() {
 50 |           return callback.callCount > 0;
 51 |       });
 52 |       runs(function() {
 53 |         trainingData = callback.mostRecentCall.args[0];
 54 |      });
 55 | 
 56 |       callback2 = jasmine.createSpy();
 57 |       callAjax(callback2,'gene.dev.head');
 58 |       waitsFor(function() {
 59 |           return callback2.callCount > 0;
 60 |       });
 61 |       runs(function() {
 62 |         devData = callback2.mostRecentCall.args[0];
 63 |       });
 64 | 
 65 |       callback3 = jasmine.createSpy();
 66 |       callAjax(callback3,'gene.key.head');
 67 |       waitsFor(function() {
 68 |           return callback3.callCount > 0;
 69 |       });
 70 |       runs(function() {
 71 |         keyData = callback3.mostRecentCall.args[0];
 72 |       });
 73 | 
 74 |     });
 75 | 
 76 |     it("should be able to generate the correct frequency counts", function() {
 77 |       var result = count(trainingData);
 78 |       var word_tags = result.word_tags;
 79 |       //debugger
 80 |       // this is a subset of the correct counts ...
 81 |       expect(word_tags.get(['Comparison','O'])).toEqual(2);
 82 |       expect(word_tags.get(['Pharmacologic','O'])).toEqual(1);
 83 |       expect(word_tags.get(['and','O'])).toEqual(1);
 84 |       expect(word_tags.get(['with','O'])).toEqual(1);
 85 |       expect(word_tags.get(['alkaline','I-GENE'])).toEqual(1);
 86 |       expect(word_tags.get(['phosphatases','I-GENE'])).toEqual(1);
 87 |       expect(word_tags.get(['5','I-GENE'])).toEqual(1);
 88 |       expect(word_tags.get(['-','I-GENE'])).toEqual(1);
 89 |       expect(word_tags.get(['nucleotidase','I-GENE'])).toEqual(1);
 90 | 
 91 |       // note the above is checking the old small training set, but we we are pulling in a bigger chunk now
 92 |       
 93 |       var grams = result.grams;
 94 |       expect(grams.get(['1','O'])).toEqual(43);
 95 |       expect(grams.get(['1','I-GENE'])).toEqual(5);
 96 |       expect(grams.get(['2','*','*'])).toEqual(3);
 97 |       expect(grams.get(['2','*','O'])).toEqual(3);
 98 |       expect(grams.get(['2','*','I-GENE'])).toEqual(0);
 99 |       expect(grams.get(['2','O','I-GENE'])).toEqual(2);
100 |       expect(grams.get(['2','O','O'])).toEqual(39);
101 |       expect(grams.get(['2','I-GENE','O'])).toEqual(1);
102 |       expect(grams.get(['2','I-GENE','I-GENE'])).toEqual(3);
103 |       expect(grams.get(['3','*','*','*'])).toEqual(0);
104 |       expect(grams.get(['3','*','*','O'])).toEqual(3);
105 |       expect(grams.get(['3','*','O','O'])).toEqual(3);
106 |       expect(grams.get(['3','O','O','O'])).toEqual(36);
107 |     });
108 | 
109 |     it("should be able to generate the correct frequency counts with infrequent cutoff", function() {
110 |       var result = rarify(count(trainingData),'_RARE_',5);
111 |       var word_tags = result.word_tags;
112 |       //debugger
113 |       // this is a subset of the correct counts ...
114 |       expect(word_tags.get(['Comparison'])).toEqual(0);
115 |       expect(word_tags.get(['Pharmacologic'])).toEqual(0);
116 |       expect(word_tags.get(['and'])).toEqual(0);
117 |       expect(word_tags.get(['with'])).toEqual(0);
118 |       expect(word_tags.get(['alkaline'])).toEqual(0);
119 |       expect(word_tags.get(['phosphatases'])).toEqual(0);
120 |       expect(word_tags.get(['5'])).toEqual(0);
121 |       expect(word_tags.get(['-'])).toEqual(0);
122 |       expect(word_tags.get(['nucleotidase'])).toEqual(0);
123 |       expect(word_tags.get(['_RARE_','O'])).toEqual(43);
124 |       expect(word_tags.get(['_RARE_','I-GENE'])).toEqual(5);
125 |       // should also be checking that words are deleted? or actually 
126 |       // what is the behaviour we want here?
127 |       
128 |       var grams = result.grams;
129 |       expect(grams.get(['1','O'])).toEqual(43);
130 |       expect(grams.get(['1','I-GENE'])).toEqual(5);
131 |     });
132 | 
133 |     it("should be able to read in the dev file and tag it", function() {
134 |        var c = count(trainingData);
135 |        var result = rarify(c,rareKeyword,2);
136 |        expect(tag(devData, result, rareKeyword)).toEqual(gene_key_head_direct.trim());
137 |     });
138 | 
139 |     it("should be able to calculate HMM Conditional Trigram probabilities", function() {
140 |        grams = new Hash({1:{},2:{},3:{}},0);
141 |        grams.set(['3','*','*','O'],0.0);
142 |        grams.set(['2','*','*'],0.0);
143 |        expect(conditionalTrigramProbability('O','*','*',grams)).toEqual(0);
144 |        grams.set(['3','*','*','O'],0.1);
145 |        grams.set(['2','*','*'],0.5);
146 |        expect(conditionalTrigramProbability('O','*','*',grams)).toEqual(0.2);
147 |     });
148 | 
149 |     it("should be able to calculate HMM Trigram probabilities given a count object", function() {
150 |        var c = count(trainingData);
151 |        var grams = c.grams;
152 |        expect(conditionalTrigramProbability('O','*','*',grams)).toEqual(grams.get(['3','*','*','O'])/grams.get(['2','*','*']));
153 |        expect(conditionalTrigramProbability('I-GENE','*','*',grams)).toEqual(0);
154 |        expect(conditionalTrigramProbability('I-GENE','I-GENE','*',grams)).toEqual(0);
155 |        expect(conditionalTrigramProbability('STOP','I-GENE','I-GENE',grams)).toEqual(1/3);
156 |     });
157 | 
158 |     it("should be able to calculate emission probabilities", function() {
159 |        var c = count(trainingData);
160 |        var grams = c.grams;
161 |        var word_tags = c.word_tags;
162 |        expect(emission('Comparison','O', word_tags, grams)).toEqual(2/43);
163 |        expect(emission('Blah','O', word_tags, grams)).toEqual(0);
164 |        expect(emission('alkaline','I-GENE', word_tags, grams)).toEqual(0.2);
165 |        expect(emission('Comparison','I-GENE', word_tags, grams)).toEqual(0);
166 |        expect(emission('alkaline','O', word_tags, grams)).toEqual(0);
167 |        // TODO would be good to be checking for divide by zero
168 |        grams.set(['1','O'],0);
169 |        expect(emission('Comparison','O', word_tags, grams)).toEqual(0);
170 |     });
171 | 
172 |     it("should be able to compute the viterbi algorithm", function() {
173 |        var c = count(trainingData);
174 |        var result = rarify(c,rareKeyword,2);
175 |        var result2 = viterbi("Comparison with alkaline",result);
176 |        expect(result2.tag_sequence).toEqual({0:'O',1:'O',2:'O'});
177 |        expect(result2.max).toEqual(0.001296748609757334);
178 |        // not sure if the above are actually correct, but they are at least sensible
179 |        // Next step is to see what tagging we get on the gene.dev set
180 | 
181 |     });
182 | 
183 |     it("should be able to get appropriate sets for possible tags at each position in a sentence", function() {
184 |        expect(getSet(-1)).toEqual({'*':undefined});
185 |        expect(getSet(0)).toEqual({'*':undefined});
186 |        expect(getSet(1)).toEqual({'O':undefined,'I-GENE':undefined,'STOP':undefined});
187 |        expect(getSet(100)).toEqual({'O':undefined,'I-GENE':undefined,'STOP':undefined});
188 | 
189 |     });
190 | 
191 |   });
192 | 
193 | 
194 |   describe("ajax", function () {
195 |     var callback,data;
196 | 
197 |     beforeEach(function() {
198 |       callback = jasmine.createSpy();
199 |       callAjax(callback,'gene.train.head');
200 |       waitsFor(function() {
201 |           return callback.callCount > 0;
202 |       });
203 |       runs(function() {
204 |         data = callback.mostRecentCall.args[0];
205 |       });
206 |     });
207 | 
208 |     it("should make a real AJAX request", function () {
209 |       runs(function() {
210 |         expect(callback.mostRecentCall.args[0]).toEqual(gene_train_head_direct);
211 |       });
212 |     });
213 |   });
214 | 
215 | 
216 | 
217 |   
218 | 
219 | 
220 | 
221 | 
222 | });
223 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.uri.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * $ URIs @VERSION
  3 |  * 
  4 |  * Copyright (c) 2008,2009 Jeni Tennison
  5 |  * Licensed under the MIT (MIT-LICENSE.txt)
  6 |  *
  7 |  */
  8 | /**
  9 |  * @fileOverview $ URIs
 10 |  * @author <a href="mailto:jeni@jenitennison.com">Jeni Tennison</a>
 11 |  * @copyright (c) 2008,2009 Jeni Tennison
 12 |  * @license MIT license (MIT-LICENSE.txt)
 13 |  * @version 1.0
 14 |  */
 15 | /**
 16 |  * @class
 17 |  * @name jQuery
 18 |  * @exports $ as jQuery
 19 |  * @description rdfQuery is a <a href="http://jquery.com/">jQuery</a> plugin. The only fields and methods listed here are those that come as part of the rdfQuery library.
 20 |  */
 21 | (function ($) {
 22 | 
 23 |   var
 24 |     mem = {},
 25 |     uriRegex = /^(([a-z][\-a-z0-9+\.]*):)?(\/\/([^\/?#]+))?([^?#]*)?(\?([^#]*))?(#(.*))?$/i,
 26 |     docURI,
 27 | 
 28 |     parseURI = function (u) {
 29 |       var m = u.match(uriRegex);
 30 |       if (m === null) {
 31 |         throw "Malformed URI: " + u;
 32 |       }
 33 |       return {
 34 |         scheme: m[1] ? m[2].toLowerCase() : undefined,
 35 |         authority: m[3] ? m[4] : undefined,
 36 |         path: m[5] || '',
 37 |         query: m[6] ? m[7] : undefined,
 38 |         fragment: m[8] ? m[9] : undefined
 39 |       };
 40 |     },
 41 | 
 42 |     removeDotSegments = function (u) {
 43 |       var r = '', m = [];
 44 |       if (/\./.test(u)) {
 45 |         while (u !== undefined && u !== '') {
 46 |           if (u === '.' || u === '..') {
 47 |             u = '';
 48 |           } else if (/^\.\.\//.test(u)) { // starts with ../
 49 |             u = u.substring(3);
 50 |           } else if (/^\.\//.test(u)) { // starts with ./
 51 |             u = u.substring(2);
 52 |           } else if (/^\/\.(\/|$)/.test(u)) { // starts with /./ or consists of /.
 53 |             u = '/' + u.substring(3);
 54 |           } else if (/^\/\.\.(\/|$)/.test(u)) { // starts with /../ or consists of /..
 55 |             u = '/' + u.substring(4);
 56 |             r = r.replace(/\/?[^\/]+$/, '');
 57 |           } else {
 58 |             m = u.match(/^(\/?[^\/]*)(\/.*)?$/);
 59 |             u = m[2];
 60 |             r = r + m[1];
 61 |           }
 62 |         }
 63 |         return r;
 64 |       } else {
 65 |         return u;
 66 |       }
 67 |     },
 68 | 
 69 |     merge = function (b, r) {
 70 |       if (b.authority !== '' && (b.path === undefined || b.path === '')) {
 71 |         return '/' + r;
 72 |       } else {
 73 |         return b.path.replace(/[^\/]+$/, '') + r;
 74 |       }
 75 |     };
 76 | 
 77 |   /**
 78 |    * Creates a new jQuery.uri object. This should be invoked as a method rather than constructed using new.
 79 |    * @class Represents a URI
 80 |    * @param {String} [relative='']
 81 |    * @param {String|jQuery.uri} [base] Defaults to the base URI of the page
 82 |    * @returns {jQuery.uri} The new jQuery.uri object.
 83 |    * @example uri = jQuery.uri('/my/file.html');
 84 |    */
 85 |   $.uri = function (relative, base) {
 86 |     var uri;
 87 |     relative = relative || '';
 88 |     if (mem[relative]) {
 89 |       return mem[relative];
 90 |     }
 91 |     base = base || $.uri.base();
 92 |     if (typeof base === 'string') {
 93 |       base = $.uri.absolute(base);
 94 |     }
 95 |     uri = new $.uri.fn.init(relative, base);
 96 |     if (mem[uri]) {
 97 |       return mem[uri];
 98 |     } else {
 99 |       mem[uri] = uri;
100 |       return uri;
101 |     }
102 |   };
103 | 
104 |   $.uri.fn = $.uri.prototype = {
105 |     /**
106 |      * The scheme used in the URI
107 |      * @type String
108 |      */
109 |     scheme: undefined,
110 |     /**
111 |      * The authority used in the URI
112 |      * @type String
113 |      */
114 |     authority: undefined,
115 |     /**
116 |      * The path used in the URI
117 |      * @type String
118 |      */
119 |     path: undefined,
120 |     /**
121 |      * The query part of the URI
122 |      * @type String
123 |      */
124 |     query: undefined,
125 |     /**
126 |      * The fragment part of the URI
127 |      * @type String
128 |      */
129 |     fragment: undefined,
130 |     
131 |     init: function (relative, base) {
132 |       var r = {};
133 |       base = base || {};
134 |       $.extend(this, parseURI(relative));
135 |       if (this.scheme === undefined) {
136 |         this.scheme = base.scheme;
137 |         if (this.authority !== undefined) {
138 |           this.path = removeDotSegments(this.path);
139 |         } else {
140 |           this.authority = base.authority;
141 |           if (this.path === '') {
142 |             this.path = base.path;
143 |             if (this.query === undefined) {
144 |               this.query = base.query;
145 |             }
146 |           } else {
147 |             if (!/^\//.test(this.path)) {
148 |               this.path = merge(base, this.path);
149 |             }
150 |             this.path = removeDotSegments(this.path);
151 |           }
152 |         }
153 |       }
154 |       if (this.scheme === undefined) {
155 |         throw "Malformed URI: URI is not an absolute URI and no base supplied: " + relative;
156 |       }
157 |       return this;
158 |     },
159 |   
160 |     /**
161 |      * Resolves a relative URI relative to this URI
162 |      * @param {String} relative
163 |      * @returns jQuery.uri
164 |      */
165 |     resolve: function (relative) {
166 |       return $.uri(relative, this);
167 |     },
168 |     
169 |     /**
170 |      * Creates a relative URI giving the path from this URI to the absolute URI passed as a parameter
171 |      * @param {String|jQuery.uri} absolute
172 |      * @returns String
173 |      */
174 |     relative: function (absolute) {
175 |       var aPath, bPath, i = 0, j, resultPath = [], result = '';
176 |       if (typeof absolute === 'string') {
177 |         absolute = $.uri(absolute, {});
178 |       }
179 |       if (absolute.scheme !== this.scheme || 
180 |           absolute.authority !== this.authority) {
181 |         return absolute.toString();
182 |       }
183 |       if (absolute.path !== this.path) {
184 |         aPath = absolute.path.split('/');
185 |         bPath = this.path.split('/');
186 |         if (aPath[1] !== bPath[1]) {
187 |           result = absolute.path;
188 |         } else {
189 |           while (aPath[i] === bPath[i]) {
190 |             i += 1;
191 |           }
192 |           j = i;
193 |           for (; i < bPath.length - 1; i += 1) {
194 |             resultPath.push('..');
195 |           }
196 |           for (; j < aPath.length; j += 1) {
197 |             resultPath.push(aPath[j]);
198 |           }
199 |           result = resultPath.join('/');
200 |         }
201 |         result = absolute.query === undefined ? result : result + '?' + absolute.query;
202 |         result = absolute.fragment === undefined ? result : result + '#' + absolute.fragment;
203 |         return result;
204 |       }
205 |       if (absolute.query !== undefined && absolute.query !== this.query) {
206 |         return '?' + absolute.query + (absolute.fragment === undefined ? '' : '#' + absolute.fragment);
207 |       }
208 |       if (absolute.fragment !== undefined && absolute.fragment !== this.fragment) {
209 |         return '#' + absolute.fragment;
210 |       }
211 |       return '';
212 |     },
213 |   
214 |     /**
215 |      * Returns the URI as an absolute string
216 |      * @returns String
217 |      */
218 |     toString: function () {
219 |       var result = '';
220 |       if (this._string) {
221 |         return this._string;
222 |       } else {
223 |         result = this.scheme === undefined ? result : (result + this.scheme + ':');
224 |         result = this.authority === undefined ? result : (result + '//' + this.authority);
225 |         result = result + this.path;
226 |         result = this.query === undefined ? result : (result + '?' + this.query);
227 |         result = this.fragment === undefined ? result : (result + '#' + this.fragment);
228 |         this._string = result;
229 |         return result;
230 |       }
231 |     }
232 |   
233 |   };
234 | 
235 |   $.uri.fn.init.prototype = $.uri.fn;
236 | 
237 |   /**
238 |    * Creates a {@link jQuery.uri} from a known-to-be-absolute URI
239 |    * @param {String}
240 |    * @returns {jQuery.uri}
241 |    */
242 |   $.uri.absolute = function (uri) {
243 |     return $.uri(uri, {});
244 |   };
245 | 
246 |   /**
247 |    * Creates a {@link jQuery.uri} from a relative URI and an optional base URI
248 |    * @returns {jQuery.uri}
249 |    * @see jQuery.uri
250 |    */
251 |   $.uri.resolve = function (relative, base) {
252 |     return $.uri(relative, base);
253 |   };
254 |   
255 |   /**
256 |    * Creates a string giving the relative path from a base URI to an absolute URI
257 |    * @param {String} absolute
258 |    * @param {String} base
259 |    * @returns {String}
260 |    */
261 |   $.uri.relative = function (absolute, base) {
262 |     return $.uri(base, {}).relative(absolute);
263 |   };
264 |   
265 |   /**
266 |    * Returns the base URI of the page
267 |    * @returns {jQuery.uri}
268 |    */
269 |   $.uri.base = function () {
270 |     return $(document).base();
271 |   };
272 |   
273 |   /**
274 |    * Returns the base URI in scope for the first selected element
275 |    * @methodOf jQuery#
276 |    * @name jQuery#base
277 |    * @returns {jQuery.uri}
278 |    * @example baseURI = $('img').base();
279 |    */
280 |   $.fn.base = function () {
281 |     var base = $(this).parents().andSelf().find('base').attr('href'),
282 |       doc = $(this)[0].ownerDocument || document,
283 |       docURI = $.uri.absolute(doc.location === null ? document.location.href : doc.location.href);
284 |     return base === undefined ? docURI : $.uri(base, docURI);
285 |   };
286 | 
287 | })(jQuery);
288 | 


--------------------------------------------------------------------------------
/query.js:
--------------------------------------------------------------------------------
  1 |  // get the object we'll use for persistent storage
  2 | var storage = getStorage();
  3 | 
  4 | initStorage(storage);
  5 | 
  6 | function query(storage, sentence) {
  7 |     // check for sentence word by word in list (hashtable)
  8 |     var words = sentence.split(" ");
  9 |     var lookup = "";
 10 |     var prepend = "";
 11 |     // seems like persistence is the key step here ...
 12 |     // dialogue history to a gist
 13 |     // can we check changes to the knowledge base directly into github ...
 14 |     // then we would get versioning .... 
 15 |     // would be nice to have local stub of that to allow for testing and working when off the grid ...
 16 |     // if we stick stuff in a json file in github what happens to our query speed? worry about optimizing that later ...
 17 |     // gradually load more and more of that data into the memory of the browser in the background as the human is typing so that 
 18 |     // we can do immediate in memory search on the JSON data?
 19 |     
 20 |     // things to do here
 21 |     // 1. grab first response from google on query on what user said
 22 |     // --- need way to grab URL of request from Google
 23 |     // 2. process assertions, e.g. 'there is a course called Mobile Design & Programming'
 24 |     // --- that relies on some form of regex and also persistence
 25 |     // 3. process queries, e.g. what is the start date of Mobile Design & Programming
 26 |     // --- want to be able to check for bigrams/trigrams having removed stopwords
 27 |     // 4. want to log transcript to some persistence store
 28 |     // 5. could add eliza/twss code?
 29 |     // 6. could add joke of the day code - looks like we can't due to cross-server scripting constraint
 30 |     // 7. chuck norris code might work
 31 | 
 32 | 
 33 |     // so perhaps we could just create a json structure to reflect the assertion ...
 34 |     // I guess ultimately we really want that flexible parse structure to handle
 35 |     // a) Mobile is a new course
 36 |     // b) I heard that there's a new course called Mobile
 37 |     // c) Have you signed up for that new Mobile course?
 38 |     var match = matchEntityAssertionRegex(sentence);
 39 |     // want to check is match is undefined or not ...
 40 |     var response = "OK";
 41 | 
 42 |     if(match !== null){
 43 |         response = match.name + ' is a ' +match.object;
 44 |         
 45 |         /* bit ugly using the name as identifier, might be better
 46 |            to use something like a GUID to represent new entities
 47 |            and name them using a separate foaf:name triple. However, then
 48 |            we'd need a way to recognise existing entities.
 49 |         */
 50 |         storage.storeEntity(match.object,match.name)
 51 | 
 52 |         // _:John a "person" ; foaf:name "John"    
 53 |         // _:John _:favourite_colour "blue" ; foaf:name "blue"    
 54 |         // _:favourite_color type_of_relation "between people"  ????  
 55 | 
 56 |         // "John" a "person" ???
 57 |         // foaf:name
 58 |         // foaf:type ?
 59 | 
 60 |     }
 61 |     else {
 62 |       var properties_match = matchPropertiesRegex(sentence);
 63 | 
 64 |       if( properties_match !== null){
 65 |          storage.storeProperty(properties_match.object, properties_match.relation, properties_match.name);
 66 |         return "The " + properties_match.relation +" for " + properties_match.object + " is " + properties_match.name;
 67 |       }
 68 |       else{
 69 |         response = handleQuestion(storage,sentence);
 70 |       }
 71 |         
 72 |     }
 73 |        
 74 |     /*
 75 |             $.getJSON("http://www.joke-db.com/widgets/src/wp/clean/monkey/123?callback=?",null
 76 |               {
 77 |         url: 
 78 |     }).done(function ( data ) {
 79 |         console.log("test");
 80 |       if( console && console.log ) {
 81 |         console.log("Sample of data:", data);
 82 |       }
 83 |     }); */
 84 |     // $.icndb.getRandomJoke(12) // this was for chuck norris
 85 |     return response;
 86 | }
 87 | 
 88 | function matchEntityAssertionRegex(sentence) {
 89 |     // Using named capture and flag x (free-spacing and line comments)
 90 |     var assert = XRegExp('(?<assert>  (T|t)here(\\si|\')s\\sa ) \\s?  # assert  \n' +
 91 |                          '(?<object> .* ) \\s  # object \n' +
 92 |                          '(?<called> called ) \\s?  # called \n' +
 93 |                          '(?<name>   .* )     # name     ', 'x');
 94 |     return XRegExp.exec(sentence, assert);  
 95 | }
 96 | 
 97 | function matchPropertiesRegex(sentence){
 98 |   //Unreal Engine has a website http://unrealengine.com  ---> _:Unreal_Engine has_a_website http://unrealengine.com
 99 |   //Unreal Engine's website is http://unrealengine.com
100 |   var assert = XRegExp('(?<object> .+ ) \\s  # object \n' +
101 |                      '(?<has_a> has\\sa ) \\s  # has_a \n' +
102 |                      '(?<relation> .+ ) \\s  # relation \n' +
103 |                      '(?<name>   .+ )     # name     ', 'x');
104 |   return XRegExp.exec(sentence, assert);
105 | }
106 | 
107 | // TODO add this to String itself e.g. String.prototype.removeStopWords = function()
108 | function removePunctuation(sentence){
109 |     return sentence.replace(/[^\w\s]/g,'');
110 | }
111 | 
112 | function getPossibleEntities(sentence){
113 |     sentence = removePunctuation(sentence); // could get this function in String itself
114 |     var words = sentence.removeStopWords().split(' ');
115 |     var bigrams = natural.NGrams.bigrams(words);
116 |     for(var i in bigrams){  // e.g. "Unreal Engine"
117 |       words.push(bigrams[i].join('_'));  // e.g. "Unreal_Engine"
118 |     }
119 |     return words;
120 | }
121 | 
122 | function handleQuestion(storage, sentence) {
123 |     // now this really needs refactoring!!!
124 |     var response = 'why?';
125 |     var words = getPossibleEntities(sentence);
126 |     var type = '';
127 |     var result = {};
128 |     // TODO return all other relations for that thing, e.g. website etc.
129 |     for(var i in words){
130 |       // _:John a ?type
131 |       result = storage.queryEntity(words[i]);
132 |       if(result !== undefined){
133 |         var obj = words[i].replace('_',' ')
134 |         // to query a specific relation we have to look for all possible relations 
135 |         // and see if any match any of the other words in the sentence
136 |         //storage.queryProperty(name,relation);
137 |         var allProps = storage.queryAllProperties(obj);
138 |         response = "I know that "+obj+" is a " + result.type;
139 |         for (var nr in allProps) {
140 |           var relation = allProps[nr].relation;
141 |           var name = allProps[nr].name;
142 |           if ((result.type != name) && (relation.indexOf("foaf") == -1)) {
143 |             debugger
144 |             if(words.some(function(x){return x === relation})){
145 |               response = "The " + relation + " for " + obj + " is " + name;
146 |               break;
147 |             }else{
148 |               response += " and " + relation + " for " + obj + " is " + name;
149 |             }
150 |           }
151 |         }
152 |         break;
153 |       }
154 |     }
155 |     return response;
156 | }
157 | 
158 | 
159 | function updateHistory(who, sentence) {
160 |   var prefix = '';
161 |   if (who == 'bot') {
162 |       prefix = 'Bot: ';
163 |   } else if (who == 'human') {
164 |       prefix = 'You: ';
165 |   }
166 | 
167 |   var fmt = '<span class="' + who +'">'+prefix+sentence+'</span><br/>';
168 |   $("div#history").append(fmt);
169 | }
170 | 
171 | function showResponse(who, what) {
172 |   storage.addToTranscript(who, what);
173 |   updateHistory(who, what);
174 | }
175 | 
176 | /*
177 | * handle commands to the bot that should not appear in the transcipt or
178 | * affect the KB.
179 | */
180 | function handleCommand(sentence) {
181 |   if (sentence == 'show kb') {
182 |       alert(storage.getKnowledgeBaseAsText());
183 |   } else if (sentence == 'show transcript') {
184 |       alert(storage.getTranscript());
185 |   } else {
186 |       return false; // was not a command
187 |   }
188 | }
189 | 
190 | function handleChat(sentence) {
191 |   if (!handleCommand(sentence)) {
192 |       showResponse('human', sentence + "<br/>");
193 |       showResponse('bot', query(storage, sentence) + "<br/>");
194 |       storage.save();
195 |   }
196 |  
197 |   return false;
198 | }
199 | 
200 | 
201 | // not functional yet - just Sam playing around with github accesss
202 | // TODO move this to storage.js    
203 | function storageGithub(){
204 |   var github = new Github({
205 |     token: "OAUTH_TOKEN",
206 |     auth: "oauth"
207 |   });
208 |   var repo = github.getRepo('tansaku','faqbot');
209 |   repo.read('master', 'initial_kb.json', function(err, data) {});
210 |   repo = repo + "new data";
211 |   repo.write('master', 'initial_kb.json', repo, 'new data', function(err) {});
212 | }
213 | 
214 | 
215 | // If storage is empty (this is the first time we are called) then
216 | // add some basic knowledge 
217 | function initStorage(storage) {
218 |   if (storage.isEmpty()) {
219 |       // load the initial knowledge base from a text file in turtle format
220 |       $.get('initial_kb.txt', function(turtle) {
221 |           storage.loadKnowledgeBaseFromString(turtle);
222 |           //alert("from local file: " + turtle);
223 |           storage.save();
224 |       }, 'text');
225 |   } else {
226 |       storage.load();
227 |   }
228 | } 
229 | 
230 | function showTranscript(storage) {
231 |   var transcript = storage.getTranscript();
232 |   if (transcript.length > 0) {
233 |       for (var i=0; i<transcript.length; ++i) {
234 |           // TODO: show timestamps for old chats
235 |           updateHistory(transcript[i].actor, transcript[i].text);
236 |       }
237 |   }
238 | }
239 | 
240 | function stringToResource(s) {
241 | //	return '_:' + s.replace(' ', '_').replace('\'', '').replace.('"','');
242 | return '_:' + s;
243 | }
244 | 
245 | function quote(s) {
246 |   return '"' + s + '"';
247 | }
248 | 
249 | 
250 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.xmlns.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * jQuery CURIE @VERSION
  3 |  * 
  4 |  * Copyright (c) 2008,2009 Jeni Tennison
  5 |  * Licensed under the MIT (MIT-LICENSE.txt)
  6 |  *
  7 |  * Depends:
  8 |  *  jquery.uri.js
  9 |  */
 10 | /**
 11 |  * @fileOverview XML Namespace processing
 12 |  * @author <a href="mailto:jeni@jenitennison.com">Jeni Tennison</a>
 13 |  * @copyright (c) 2008,2009 Jeni Tennison
 14 |  * @license MIT license (MIT-LICENSE.txt)
 15 |  * @version 1.0
 16 |  * @requires jquery.uri.js
 17 |  */
 18 | 
 19 | /*global jQuery */
 20 | (function ($) {
 21 | 
 22 |   var 
 23 |     xmlNs = 'http://www.w3.org/XML/1998/namespace',
 24 |     xmlnsNs = 'http://www.w3.org/2000/xmlns/',
 25 |     
 26 |     xmlnsRegex = /\sxmlns(?::([^ =]+))?\s*=\s*(?:"([^"]*)"|'([^']*)')/g,
 27 |     
 28 |     ncNameChar = '[-A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u10000-\uEFFFF\.0-9\u00B7\u0300-\u036F\u203F-\u2040]',
 29 |     ncNameStartChar = '[\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3\u4E00-\u9FA5\u3007\u3021-\u3029_]',
 30 |     ncNameRegex = new RegExp('^' + ncNameStartChar + ncNameChar + '*$');
 31 |     
 32 | 
 33 | /**
 34 |  * Returns the namespaces declared in the scope of the first selected element, or
 35 |  * adds a namespace declaration to all selected elements. Pass in no parameters
 36 |  * to return all namespaces bindings on the first selected element. If only 
 37 |  * the prefix parameter is specified, this method will return the namespace
 38 |  * URI that is bound to the specified prefix on the first element in the selection
 39 |  * If the prefix and uri parameters are both specified, this method will
 40 |  * add the binding of the specified prefix and namespace URI to all elements
 41 |  * in the selection.
 42 |  * @methodOf jQuery#
 43 |  * @name jQuery#xmlns
 44 |  * @param {String} [prefix] Restricts the namespaces returned to only the namespace with the specified namespace prefix.
 45 |  * @param {String|jQuery.uri} [uri] Adds a namespace declaration to the selected elements that maps the specified prefix to the specified namespace.
 46 |  * @param {Object} [inherited] A map of inherited namespace bindings.
 47 |  * @returns {Object|jQuery.uri|jQuery}
 48 |  * @example 
 49 |  * // Retrieve all of the namespace bindings on the HTML document element
 50 |  * var nsMap = $('html').xmlns();
 51 |  * @example
 52 |  * // Retrieve the namespace URI mapped to the 'dc' prefix on the HTML document element
 53 |  * var dcNamespace = $('html').xmlns('dc');
 54 |  * @example
 55 |  * // Create a namespace declaration that binds the 'dc' prefix to the URI 'http://purl.org/dc/elements/1.1/'
 56 |  * $('html').xmlns('dc', 'http://purl.org/dc/elements/1.1/');
 57 |  */
 58 |   $.fn.xmlns = function (prefix, uri, inherited) {
 59 |     var 
 60 |       elem = this.eq(0),
 61 |       ns = elem.data('xmlns'),
 62 |       e = elem[0], a, p, i,
 63 |       decl = prefix ? 'xmlns:' + prefix : 'xmlns',
 64 |       value,
 65 |       tag, found = false;
 66 |     if (uri === undefined) {
 67 |       if (prefix === undefined) { // get the in-scope declarations on the first element
 68 |         if (!ns) {
 69 |           ns = {
 70 | //            xml: $.uri(xmlNs)
 71 |           };
 72 |           if (e.attributes && e.attributes.getNamedItemNS) {
 73 |             for (i = 0; i < e.attributes.length; i += 1) {
 74 |               a = e.attributes[i];
 75 |               if (/^xmlns(:(.+))?$/.test(a.nodeName)) {
 76 |                 prefix = /^xmlns(:(.+))?$/.exec(a.nodeName)[2] || '';
 77 |                 value = a.nodeValue;
 78 |                 if (prefix === '' || (value !== '' && value !== xmlNs && value !== xmlnsNs && ncNameRegex.test(prefix) && prefix !== 'xml' && prefix !== 'xmlns')) {
 79 |                   ns[prefix] = $.uri(a.nodeValue);
 80 |                   found = true;
 81 |                 }
 82 |               }
 83 |             }
 84 |           } else {
 85 |             tag = /<[^>]+>/.exec(e.outerHTML);
 86 |             a = xmlnsRegex.exec(tag);
 87 |             while (a !== null) {
 88 |               prefix = a[1] || '';
 89 |               value = a[2] || a[3];
 90 |               if (prefix === '' || (value !== '' && value !== xmlNs && value !== xmlnsNs && ncNameRegex.test(prefix) && prefix !== 'xml' && prefix !== 'xmlns')) {
 91 |                 ns[prefix] = $.uri(a[2] || a[3]);
 92 |                 found = true;
 93 |               }
 94 |               a = xmlnsRegex.exec(tag);
 95 |             }
 96 |             xmlnsRegex.lastIndex = 0;
 97 |           }
 98 |           inherited = inherited || (e.parentNode.nodeType === 1 ? elem.parent().xmlns() : {});
 99 |           ns = found ? $.extend({}, inherited, ns) : inherited;
100 |           elem.data('xmlns', ns);
101 |         }
102 |         return ns;
103 |       } else if (typeof prefix === 'object') { // set the prefix mappings defined in the object
104 |         for (p in prefix) {
105 |           if (typeof prefix[p] === 'string' && ncNameRegex.test(p)) {
106 |             this.xmlns(p, prefix[p]);
107 |           }
108 |         }
109 |         this.find('*').andSelf().removeData('xmlns');
110 |         return this;
111 |       } else { // get the in-scope declaration associated with this prefix on the first element
112 |         if (!ns) {
113 |           ns = elem.xmlns();
114 |         }
115 |         return ns[prefix];
116 |       }
117 |     } else { // set
118 |       this.find('*').andSelf().removeData('xmlns');
119 |       return this.attr(decl, uri);
120 |     }
121 |   };
122 | 
123 | /**
124 |  * Removes one or more XML namespace bindings from the selected elements.
125 |  * @methodOf jQuery#
126 |  * @name jQuery#removeXmlns
127 |  * @param {String|Object|String[]} prefix The prefix(es) of the XML namespace bindings that are to be removed from the selected elements.
128 |  * @returns {jQuery} The original jQuery object.
129 |  * @example
130 |  * // Remove the foaf namespace declaration from the body element:
131 |  * $('body').removeXmlns('foaf');
132 |  * @example
133 |  * // Remove the foo and bar namespace declarations from all h2 elements
134 |  * $('h2').removeXmlns(['foo', 'bar']);
135 |  * @example
136 |  * // Remove the foo and bar namespace declarations from all h2 elements
137 |  * var namespaces = { foo : 'http://www.example.org/foo', bar : 'http://www.example.org/bar' };
138 |  * $('h2').removeXmlns(namespaces);
139 |  */
140 |   $.fn.removeXmlns = function (prefix) {
141 |     var decl, p, i;
142 |     if (typeof prefix === 'object') {
143 |       if (prefix.length === undefined) { // assume an object representing namespaces
144 |         for (p in prefix) {
145 |           if (typeof prefix[p] === 'string') {
146 |             this.removeXmlns(p);
147 |           }
148 |         }
149 |       } else { // it's an array
150 |         for (i = 0; i < prefix.length; i += 1) {
151 |           this.removeXmlns(prefix[i]);
152 |         }
153 |       }
154 |     } else {
155 |       decl = prefix ? 'xmlns:' + prefix : 'xmlns';
156 |       this.removeAttr(decl);
157 |     }
158 |     this.find('*').andSelf().removeData('xmlns');
159 |     return this;
160 |   };
161 | 
162 |   $.fn.qname = function (name) {
163 |     var m, prefix, namespace;
164 |     if (name === undefined) {
165 |       if (this[0].outerHTML === undefined) {
166 |         name = this[0].nodeName.toLowerCase();
167 |       } else {
168 |         name = /<([^ >]+)/.exec(this[0].outerHTML)[1].toLowerCase();
169 |       }
170 |     }
171 |     if (name === '?xml:namespace') {
172 |       // there's a prefix on the name, but we can't get at it
173 |       throw "XMLinHTML: Unable to get the prefix to resolve the name of this element";
174 |     }
175 |     m = /^(([^:]+):)?([^:]+)$/.exec(name);
176 |     prefix = m[2] || '';
177 |     namespace = this.xmlns(prefix);
178 |     if (namespace === undefined && prefix !== '') {
179 |       throw "MalformedQName: The prefix " + prefix + " is not declared";
180 |     }
181 |     return {
182 |       namespace: namespace,
183 |       localPart: m[3],
184 |       prefix: prefix,
185 |       name: name
186 |     };
187 |   };
188 | 
189 | })(jQuery);
190 | 


--------------------------------------------------------------------------------
/h1-p/eval_gene_tagger.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | 
  3 | __author__="Daniel Bauer <bauer@cs.columbia.edu>"
  4 | __date__ ="$Sep 29, 2011"
  5 | 
  6 | import sys
  7 | 
  8 | 
  9 | """
 10 | Evaluate gene tagger output by comparing it to a gold standard file.
 11 | 
 12 | Running the script on your tagger output like this
 13 | 
 14 |     python eval_gene_tagger.py gene_dev.key your_tagger_output.dat
 15 | 
 16 | will generate a table of results like this:
 17 | 
 18 |     Found 14071 GENES. Expected 5942 GENES; Correct: 3120.
 19 | 
 20 | 		 precision 	recall 		F1-Score
 21 |     GENE:	 0.433367	0.231270	0.301593
 22 | 
 23 | Adopted from original named entity evaluation.
 24 | 
 25 | """
 26 | 
 27 | def corpus_iterator(corpus_file, with_logprob = False):
 28 |     """
 29 |     Get an iterator object over the corpus file. The elements of the
 30 |     iterator contain (word, ne_tag) tuples. Blank lines, indicating
 31 |     sentence boundaries return (None, None).
 32 |     """
 33 |     l = corpus_file.readline()    
 34 |     tagfield = with_logprob and -2 or -1
 35 | 
 36 |     try:
 37 |         while l:
 38 |             line = l.strip()
 39 |             if line: # Nonempty line
 40 |                 # Extract information from line.
 41 |                 # Each line has the format
 42 |                 # word ne_tag [log_prob]
 43 |                 fields = line.split(" ")
 44 |                 ne_tag = fields[tagfield]
 45 |                 word = " ".join(fields[:tagfield])
 46 |                 yield word, ne_tag
 47 |             else: # Empty line
 48 |                 yield (None, None)
 49 |             l = corpus_file.readline()
 50 |     except IndexError:
 51 |         sys.stderr.write("Could not read line: \n")
 52 |         sys.stderr.write("\n%s" % line)
 53 |         if with_logprob:
 54 |             sys.stderr.write("Did you forget to output log probabilities in the prediction file?\n")
 55 |         sys.exit(1)
 56 | 
 57 | 
 58 | class NeTypeCounts(object):
 59 |     """
 60 |     Stores true/false positive/negative counts for each NE type.
 61 |     """
 62 | 
 63 |     def __init__(self):
 64 |         self.tp = 0
 65 |         self.fp = 0
 66 |         self.tn = 0
 67 |         self.fn = 0 
 68 | 
 69 |     def get_precision(self):
 70 |         return self.tp / float(self.tp + self.fp)
 71 | 
 72 |     def get_recall(self):
 73 |         return self.tp / float(self.tp + self.fn)
 74 | 
 75 |     def get_accuracy(self):
 76 |         return (self.tp + self.tn) / float(self.tp + self.tn + self.fp + self.fn)
 77 | 
 78 | 
 79 | class Evaluator(object):
 80 |     """
 81 |     Stores global true/false positive/negative counts. 
 82 |     """
 83 | 
 84 | 
 85 |     ne_classes = ["GENE"]
 86 | 
 87 |     def __init__(self):        
 88 |         self.tp = 0
 89 |         self.tn = 0
 90 |         self.fp = 0        
 91 |         self.fn = 0
 92 | 
 93 |         # Initialize an object that counts true/false positives/negatives
 94 |         # for each NE class
 95 |         self.class_counts = {}
 96 |         for c in self.ne_classes:
 97 |             self.class_counts[c] = NeTypeCounts()
 98 | 
 99 |     def compare(self, gold_standard, prediction):
100 |         """
101 |         Compare the prediction against a gold standard. Both objects must be
102 |         generator or iterator objects that return a (word, ne_tag) tuple at a
103 |         time.
104 |         """
105 | 
106 |         # Define a couple of tags indicating the status of each stream
107 |         curr_pred_type = None # prediction stream was previously in a named entity
108 |         curr_pred_start = None # a new prediction starts at the current token
109 |         curr_gs_type = None   # prediction stream was previously in a named entity
110 |         curr_gs_start = None # a new prediction starts at the current token
111 | 
112 |         total = 0
113 |         for gs_word, gs_tag in gold_standard: # Move through the gold standard stream
114 |             pred_word, pred_tag = prediction.next() # Get the corresponding item from the prediction stream
115 |             
116 |             # Make sure words in both files match up
117 |             if gs_word != pred_word:
118 |                 sys.stderr.write("Could not align gold standard and predictions in line %i.\n" % (total+1))
119 |                 sys.stderr.write("Gold standard: %s  Prediction file: %s\n" % (gs_word, pred_word))
120 |                 sys.exit(1)        
121 | 
122 |             # Split off the I and B tags
123 |             gs_type = gs_tag==None and "O" or gs_tag.split("-")[-1]
124 |             pred_type = pred_tag==None and "O" or pred_tag.split("-")[-1]                        
125 | 
126 |             # Check if a named entity ends here in either stream.
127 |             # This is the case if we are currently in an entity and either
128 |             #   - end of sentence
129 |             #   - current word is marked O
130 |             #   - new entity starts (B - or I with different NE type)
131 |             pred_ends = curr_pred_type!=None and ((pred_tag==None or pred_tag[0] in "OB") or (curr_pred_type!=pred_type and pred_tag[0]=="I"))
132 |             gs_ends = curr_gs_type!=None and ((gs_tag==None or gs_tag[0] in "OB") or (curr_gs_type!=gs_type and gs_tag[0]=="I"))
133 |             
134 | 
135 |             # Check if a named entity starts here in either stream.
136 |             # This is tha case if this is not the end of a sentence and
137 |             #   - This is not the end of a sentence
138 |             #   - New entity starts (B, I after O or at begining of sentence or
139 |             #       I with different NE type) 
140 |             if pred_word!=None:
141 |                 pred_start = (pred_tag!=None and pred_tag[0] == "B") or (curr_pred_type==None and pred_tag[0]=="I") or \
142 |                     (curr_pred_type!=None and curr_pred_type!=pred_type and pred_tag.startswith("I"))
143 |                 gs_starts = (gs_tag!=None and gs_tag[0] == "B") or (curr_gs_type==None and gs_tag[0]=="I") or \
144 |                     (curr_gs_type!=None and curr_gs_type!=gs_type and gs_tag.startswith("I"))
145 |             else:
146 |                 pred_start = False
147 |                 gs_starts = False            
148 | 
149 |             #For debugging:
150 |             #print pred_word, gs_tag, pred_tag, pred_ends, gs_ends, pred_start, gs_starts
151 | 
152 | 
153 |             # Now try to match up named entities that end here
154 | 
155 |             if gs_ends and pred_ends: # GS and prediction contain a named entity that ends in the same place
156 | 
157 |                 #If both named entities start at the same place and are of the same type
158 |                 if curr_gs_start == curr_pred_start and curr_gs_type == curr_pred_type:
159 |                     # Count true positives
160 |                     self.tp += 1
161 |                     self.class_counts[curr_pred_type].tp += 1
162 |                 else: #span matches, but label doesn't match: count both a true positive and a false negative
163 |                     self.fp += 1
164 |                     self.fn += 1
165 |                     self.class_counts[curr_pred_type].fp += 1
166 |                     self.class_counts[curr_gs_type].fn += 1
167 |             elif gs_ends: #Didn't find the named entity in the gold standard, count false negative
168 |                 self.fn += 1
169 |                 self.class_counts[curr_gs_type].fn += 1
170 |             elif pred_ends: #Named entity in the prediction doesn't match one int he gold_standard, count false positive
171 |                 self.fp += 1
172 |                 self.class_counts[curr_pred_type].fp += 1
173 |             elif curr_pred_type==None and curr_pred_type==None: #matching O tag or end of sentence, count true negative
174 |                 self.tn += 1
175 |                 for c in self.ne_classes:
176 |                     self.class_counts[c].tn += 1
177 | 
178 |             # Remember that we are no longer in a named entity
179 |             if gs_ends:
180 |                 curr_gs_type = None
181 |             if pred_ends:
182 |                 curr_pred_type = None
183 | 
184 |             # If a named entity starts here, remember it's type and this position
185 |             if gs_starts:
186 |                 curr_gs_start = total
187 |                 curr_gs_type = gs_type
188 |             if pred_start:
189 |                 curr_pred_start = total
190 |                 curr_pred_type = pred_type
191 |             total += 1
192 | 
193 |     def print_scores(self):
194 |         """
195 |         Output a table with accuracy, precision, recall and F1 score. 
196 |         """
197 | 
198 |         print "Found %i GENEs. Expected %i GENEs; Correct: %i.\n" % (self.tp + self.fp, self.tp + self.fn, self.tp)
199 | 
200 | 
201 |         if self.tp + self.tn + self.fp + self.fn == 0: # There was nothing to do.
202 |             acc = 1
203 |         else:
204 |             acc = (self.tp + self.tn) / float(self.tp + self.tn + self.fp + self.fn)
205 | 
206 |         if self.tp+self.fp == 0:   # Prediction didn't annotate any NEs
207 |             prec = 1
208 |             
209 |         else:
210 |             prec = self.tp / float(self.tp + self.fp)
211 |             
212 | 
213 |         if self.tp+self.fn == 0: # Prediction marked everything as a NE of the wrong type.
214 |             rec = 1
215 |         else:
216 |             rec = self.tp / float(self.tp + self.fn)
217 | 
218 |         print "\t precision \trecall \t\tF1-Score"
219 |         fscore = (2*prec*rec)/(prec+rec)
220 |         #print "Total:\t %f\t%f\t%f" % (prec, rec, fscore)
221 |         for c in self.ne_classes:
222 |             c_tp = self.class_counts[c].tp
223 |             c_tn = self.class_counts[c].tn
224 |             c_fp = self.class_counts[c].fp
225 |             c_fn = self.class_counts[c].fn
226 |             #print c
227 |             #print c_tp
228 |             #print c_tn
229 |             #print c_fp
230 |             #print c_fn
231 |             if (c_tp + c_tn + c_fp + c_fn) == 0:                
232 |                 c_acc = 1
233 |             else:
234 |                 c_acc = (c_tp + c_tn) / float(c_tp + c_tn + c_fp + c_fn)
235 |             
236 |             if (c_tp + c_fn) == 0:
237 |                 sys.stderr.write("Warning: no instances for entity type %s in gold standard.\n" % c)
238 |                 c_rec = 1
239 |             else:
240 |                 c_rec = c_tp / float(c_tp + c_fn)
241 |             if (c_tp + c_fp) == 0:
242 |                 sys.stderr.write("Warning: prediction file does not contain any instances of entity type %s.\n" % c)
243 |                 c_prec =1
244 |             else:
245 |                 c_prec = c_tp / float(c_tp + c_fp)
246 | 
247 |             if c_prec + c_rec == 0:
248 |                 fscore = 0
249 |             else:    
250 |                 fscore = (2*c_prec * c_rec)/(c_prec + c_rec)
251 |             print "%s:\t %f\t%f\t%f" % (c, c_prec, c_rec, fscore)
252 | 
253 | 
254 | def usage():
255 |     sys.stderr.write("""
256 |     Usage: python eval_gene_tagger.py [key_file] [prediction_file]
257 |         Evaluate the gene-tagger output in prediction_file against
258 |         the gold standard in key_file. Output accuracy, precision,
259 |         recall and F1-Score.\n""")
260 | 
261 | if __name__ == "__main__":
262 | 
263 |     if len(sys.argv)!=3:
264 |         usage()
265 |         sys.exit(1)
266 |     gs_iterator = corpus_iterator(file(sys.argv[1]))
267 |     pred_iterator = corpus_iterator(file(sys.argv[2]), with_logprob = False)
268 |     evaluator = Evaluator()
269 |     evaluator.compare(gs_iterator, pred_iterator)
270 |     evaluator.print_scores()
271 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.curie.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * jQuery CURIE @VERSION
  3 |  *
  4 |  * Copyright (c) 2008,2009 Jeni Tennison
  5 |  * Licensed under the MIT (MIT-LICENSE.txt)
  6 |  *
  7 |  * Depends:
  8 |  *  jquery.uri.js
  9 |  *  jquery.xmlns.js
 10 |  */
 11 | 
 12 | /**
 13 |  * @fileOverview jQuery CURIE handling
 14 |  * @author <a href="mailto:jeni@jenitennison.com">Jeni Tennison</a>
 15 |  * @copyright (c) 2008,2009 Jeni Tennison
 16 |  * @license MIT license (MIT-LICENSE.txt)
 17 |  * @version 1.0
 18 |  * @requires jquery.uri.js
 19 |  * @requires jquery.xmlns.js
 20 |  */
 21 | (function ($) {
 22 | 
 23 |    /**
 24 |     * Creates a {@link jQuery.uri} object by parsing a CURIE.
 25 |     * @methodOf jQuery
 26 |     * @param {String} curie The CURIE to be parsed
 27 |     * @param {String} uri The URI string to be converted to a CURIE.
 28 |     * @param {Object} [options] CURIE parsing options
 29 |     * @param {string} [options.reservedNamespace='http://www.w3.org/1999/xhtml/vocab#'] The namespace to apply to a CURIE that has no prefix and either starts with a colon or is in the list of reserved local names
 30 |     * @param {string} [options.defaultNamespace]  The namespace to apply to a CURIE with no prefix which is not mapped to the reserved namespace by the rules given above.
 31 |     * @param {Object} [options.namespaces] A map of namespace bindings used to map CURIE prefixes to URIs.
 32 |     * @param {string[]} [options.reserved=['alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright', 'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next', 'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up']] A list of local names that will always be mapped to the URI specified by reservedNamespace.
 33 |     * @param {string} [options.charcase='lower'] Specifies whether the curie's case is altered before it's interpreted. Acceptable values are:
 34 |     * <dl>
 35 |     * <dt>lower</dt><dd>Force the CURIE string to lower case.</dd>
 36 |     * <dt>upper</dt><dd>Force the CURIE string to upper case.</dd>
 37 |     * <dt>preserve</dt><dd>Preserve the original case of the CURIE. Note that this might not be possible if the CURIE has been taken from an HTML attribute value because of the case conversions performed automatically by browsers. For this reason, it's a good idea to avoid mixed-case CURIEs within RDFa.</dd>
 38 |     * </dl>
 39 |     * @returns {jQuery.uri} A new {@link jQuery.uri} object representing the full absolute URI specified by the CURIE.
 40 |     */
 41 |   $.curie = function (curie, options) {
 42 |     var
 43 |       opts = $.extend({}, $.curie.defaults, options || {}),
 44 |       m = /^(([^:]*):)?(.+)$/.exec(curie),
 45 |       prefix = m[2],
 46 |       local = m[3],
 47 |       ns = opts.namespaces[prefix];
 48 |     if (/^:.+/.test(curie)) { // This is the case of a CURIE like ":test"
 49 |       if (opts.reservedNamespace === undefined || opts.reservedNamespace === null) {
 50 |         throw "Malformed CURIE: No prefix and no default namespace for unprefixed CURIE " + curie;
 51 |       } else {
 52 |         ns = opts.reservedNamespace;
 53 |       }
 54 |     } else if (prefix) {
 55 |       if (ns === undefined) {
 56 |         throw "Malformed CURIE: No namespace binding for " + prefix + " in CURIE " + curie;
 57 |       }
 58 |     } else {
 59 |       if (opts.charcase === 'lower') {
 60 |         curie = curie.toLowerCase();
 61 |       } else if (opts.charcase === 'upper') {
 62 |         curie = curie.toUpperCase();
 63 |       }
 64 |       if (opts.reserved.length && $.inArray(curie, opts.reserved) >= 0) {
 65 |         ns = opts.reservedNamespace;
 66 |         local = curie;
 67 |       } else if (opts.defaultNamespace === undefined || opts.defaultNamespace === null) {
 68 |         // the default namespace is provided by the application; it's not clear whether
 69 |         // the default XML namespace should be used if there's a colon but no prefix
 70 |         throw "Malformed CURIE: No prefix and no default namespace for unprefixed CURIE " + curie;
 71 |       } else {
 72 |         ns = opts.defaultNamespace;
 73 |       }
 74 |     }
 75 |     return $.uri(ns + local);
 76 |   };
 77 | 
 78 |   $.curie.defaults = {
 79 |     namespaces: {},
 80 |     reserved: [],
 81 |     reservedNamespace: undefined,
 82 |     defaultNamespace: undefined,
 83 |     charcase: 'preserve'
 84 |   };
 85 | 
 86 |    /**
 87 |     * Creates a {@link jQuery.uri} object by parsing a safe CURIE string (a CURIE
 88 |     * contained within square brackets). If the input safeCurie string does not
 89 |     * start with '[' and end with ']', the entire string content will be interpreted
 90 |     * as a URI string.
 91 |     * @methodOf jQuery
 92 |     * @param {String} safeCurie The safe CURIE string to be parsed.
 93 |     * @param {Object} [options] CURIE parsing options
 94 |     * @param {string} [options.reservedNamespace='http://www.w3.org/1999/xhtml/vocab#'] The namespace to apply to a CURIE that has no prefix and either starts with a colon or is in the list of reserved local names
 95 |     * @param {string} [options.defaultNamespace]  The namespace to apply to a CURIE with no prefix which is not mapped to the reserved namespace by the rules given above.
 96 |     * @param {Object} [options.namespaces] A map of namespace bindings used to map CURIE prefixes to URIs.
 97 |     * @param {string[]} [options.reserved=['alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright',
 98 |       'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next',
 99 |       'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up']]
100 |                         A list of local names that will always be mapped to the URI specified by reservedNamespace.
101 |     * @param {string} [options.charcase='lower'] Specifies whether the curie's case is altered before it's interpreted. Acceptable values are:
102 |     * <dl>
103 |     * <dt>lower</dt><dd>Force the CURIE string to lower case.</dd>
104 |     * <dt>upper</dt><dd>Force the CURIE string to upper case.</dd>
105 |     * <dt>preserve</dt><dd>Preserve the original case of the CURIE. Note that this might not be possible if the CURIE has been taken from an HTML attribute value because of the case conversions performed automatically by browsers. For this reason, it's a good idea to avoid mixed-case CURIEs within RDFa.</dd>
106 |     * </dl>
107 |     * @returns {jQuery.uri} A new {@link jQuery.uri} object representing the full absolute URI specified by the CURIE.
108 |     */
109 |   $.safeCurie = function (safeCurie, options) {
110 |     var m = /^\[([^\]]+)\]$/.exec(safeCurie);
111 |     return m ? $.curie(m[1], options) : $.uri(safeCurie);
112 |   };
113 | 
114 |    /**
115 |     * Creates a CURIE string from a URI string.
116 |     * @methodOf jQuery
117 |     * @param {String} uri The URI string to be converted to a CURIE.
118 |     * @param {Object} [options] CURIE parsing options
119 |     * @param {string} [options.reservedNamespace='http://www.w3.org/1999/xhtml/vocab#']
120 |     *        If the input URI starts with this value, the generated CURIE will
121 |     *        have no namespace prefix and will start with a colon character (:),
122 |     *        unless the local part of the CURIE is one of the reserved names specified
123 |     *        by the reservedNames option (see below), in which case the generated
124 |     *        CURIE will have no namespace prefix and will not start with a colon
125 |     *        character.
126 |     * @param {string} [options.defaultNamespace]  If the input URI starts with this value, the generated CURIE will have no namespace prefix and will not start with a colon.
127 |     * @param {Object} [options.namespaces] A map of namespace bindings used to map CURIE prefixes to URIs.
128 |     * @param {string[]} [options.reserved=['alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright',
129 |       'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next',
130 |       'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up']]
131 |                         A list of local names that will always be mapped to the URI specified by reservedNamespace.
132 |     * @param {string} [options.charcase='lower'] Specifies the case normalisation done to the CURIE. Acceptable values are:
133 |     * <dl>
134 |     * <dt>lower</dt><dd>Normalise the CURIE to lower case.</dd>
135 |     * <dt>upper</dt><dd>Normalise the CURIE to upper case.</dd>
136 |     * <dt>preserve</dt><dd>Preserve the original case of the CURIE. Note that this might not be possible if the CURIE has been taken from an HTML attribute value because of the case conversions performed automatically by browsers. For this reason, it's a good idea to avoid mixed-case CURIEs within RDFa.</dd>
137 |     * </dl>
138 |     * @returns {jQuery.uri} A new {@link jQuery.uri} object representing the full absolute URI specified by the CURIE.
139 |     */
140 |   $.createCurie = function (uri, options) {
141 |     var opts = $.extend({}, $.curie.defaults, options || {}),
142 |       ns = opts.namespaces,
143 |       curie;
144 |     uri = $.uri(uri).toString();
145 |     if (opts.reservedNamespace !== undefined && 
146 |         uri.substring(0, opts.reservedNamespace.toString().length) === opts.reservedNamespace.toString()) {
147 |       curie = uri.substring(opts.reservedNamespace.toString().length);
148 |       if ($.inArray(curie, opts.reserved) === -1) {
149 |         curie = ':' + curie;
150 |       }
151 |     } else {
152 |       $.each(ns, function (prefix, namespace) {
153 |         if (uri.substring(0, namespace.toString().length) === namespace.toString()) {
154 |           curie = prefix + ':' + uri.substring(namespace.toString().length);
155 |           return null;
156 |         }
157 |       });
158 |     }
159 |     if (curie === undefined) {
160 |       throw "No Namespace Binding: There's no appropriate namespace binding for generating a CURIE from " + uri;
161 |     } else {
162 |       return curie;
163 |     }
164 |   };
165 | 
166 |    /**
167 |     * Creates a {@link jQuery.uri} object by parsing the specified
168 |     * CURIE string in the context of the namespaces defined by the
169 |     * jQuery selection.
170 |     * @methodOf jQuery#
171 |     * @name jQuery#curie
172 |     * @param {String} curie The CURIE string to be parsed
173 |     * @param {Object} options The CURIE parsing options.
174 |     *        See {@link jQuery.curie} for details of the supported options.
175 |     *        The namespace declarations declared on the current jQuery
176 |     *        selection (and inherited from any ancestor elements) will automatically
177 |     *        be included in the options.namespaces property.
178 |     * @returns {jQuery.uri}
179 |     * @see jQuery.curie
180 |     */
181 |   $.fn.curie = function (curie, options) {
182 |     var opts = $.extend({}, $.fn.curie.defaults, { namespaces: this.xmlns() }, options || {});
183 |     return $.curie(curie, opts);
184 |   };
185 | 
186 |    /**
187 |     * Creates a {@link jQuery.uri} object by parsing the specified
188 |     * safe CURIE string in the context of the namespaces defined by
189 |     * the jQuery selection.
190 |     *
191 |     * @methodOf jQuery#
192 |     * @name jQuery#safeCurie
193 |     * @param {String} safeCurie The safe CURIE string to be parsed. See {@link jQuery.safeCurie} for details on how safe CURIE strings are processed.
194 |     * @param {Object} options   The CURIE parsing options.
195 |     *        See {@link jQuery.safeCurie} for details of the supported options.
196 |     *        The namespace declarations declared on the current jQuery
197 |     *        selection (and inherited from any ancestor elements) will automatically
198 |     *        be included in the options.namespaces property.
199 |     * @returns {jQuery.uri}
200 |     * @see jQuery.safeCurie
201 |     */
202 |   $.fn.safeCurie = function (safeCurie, options) {
203 |     var opts = $.extend({}, $.fn.curie.defaults, { namespaces: this.xmlns() }, options || {});
204 |     return $.safeCurie(safeCurie, opts);
205 |   };
206 | 
207 |    /**
208 |     * Creates a CURIE string from a URI string using the namespace
209 |     * bindings in the context of the current jQuery selection.
210 |     *
211 |     * @methodOf jQuery#
212 |     * @name jQuery#createCurie
213 |     * @param {String|jQuery.uri} uri The URI string to be converted to a CURIE
214 |     * @param {Object} options the CURIE parsing options.
215 |     *        See {@link jQuery.createCurie} for details of the supported options.
216 |     *        The namespace declarations declared on the current jQuery
217 |     *        selection (and inherited from any ancestor elements) will automatically
218 |     *        be included in the options.namespaces property.
219 |     * @returns {String}
220 |     * @see jQuery.createCurie
221 |     */
222 |   $.fn.createCurie = function (uri, options) {
223 |     var opts = $.extend({}, $.fn.curie.defaults, { namespaces: this.xmlns() }, options || {});
224 |     return $.createCurie(uri, opts);
225 |   };
226 | 
227 |   $.fn.curie.defaults = {
228 |     reserved: [
229 |       'alternate', 'appendix', 'bookmark', 'cite', 'chapter', 'contents', 'copyright',
230 |       'first', 'glossary', 'help', 'icon', 'index', 'last', 'license', 'meta', 'next',
231 |       'p3pv1', 'prev', 'role', 'section', 'stylesheet', 'subsection', 'start', 'top', 'up'
232 |     ],
233 |     reservedNamespace: 'http://www.w3.org/1999/xhtml/vocab#',
234 |     defaultNamespace: undefined,
235 |     charcase: 'lower'
236 |   };
237 | 
238 | })(jQuery);
239 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.datatype.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * jQuery CURIE @VERSION
  3 |  *
  4 |  * Copyright (c) 2008,2009 Jeni Tennison
  5 |  * Licensed under the MIT (MIT-LICENSE.txt)
  6 |  *
  7 |  * Depends:
  8 |  *  jquery.uri.js
  9 |  */
 10 | /**
 11 |  * @fileOverview XML Schema datatype handling
 12 |  * @author <a href="mailto:jeni@jenitennison.com">Jeni Tennison</a>
 13 |  * @copyright (c) 2008,2009 Jeni Tennison
 14 |  * @license MIT license (MIT-LICENSE.txt)
 15 |  * @version 1.0
 16 |  * @requires jquery.uri.js
 17 |  */
 18 | 
 19 | (function ($) {
 20 | 
 21 |   var strip = function (value) {
 22 |     return value.replace(/[ \t\n\r]+/, ' ').replace(/^ +/, '').replace(/ +$/, '');
 23 |   };
 24 | 
 25 |   /**
 26 |    * Creates a new jQuery.typedValue object. This should be invoked as a method
 27 |    * rather than constructed using new.
 28 |    * @class Represents a value with an XML Schema datatype
 29 |    * @param {String} value The string representation of the value
 30 |    * @param {String} datatype The XML Schema datatype URI
 31 |    * @returns {jQuery.typedValue}
 32 |    * @example intValue = jQuery.typedValue('42', 'http://www.w3.org/2001/XMLSchema#integer');
 33 |    */
 34 |   $.typedValue = function (value, datatype) {
 35 |     return $.typedValue.fn.init(value, datatype);
 36 |   };
 37 | 
 38 |   $.typedValue.fn = $.typedValue.prototype = {
 39 |     /**
 40 |      * The string representation of the value
 41 |      * @memberOf jQuery.typedValue#
 42 |      */
 43 |     representation: undefined,
 44 |     /**
 45 |      * The value as an object. The type of the object will
 46 |      * depend on the XML Schema datatype URI specified
 47 |      * in the constructor. The following table lists the mappings
 48 |      * currently supported:
 49 |      * <table>
 50 |      *   <tr>
 51 |      *   <th>XML Schema Datatype</th>
 52 |      *   <th>Value type</th>
 53 |      *   </tr>
 54 |      *   <tr>
 55 |      *     <td>http://www.w3.org/2001/XMLSchema#string</td>
 56 |      *     <td>string</td>
 57 |      *   </tr>
 58 |      *   <tr>
 59 |      *     <td>http://www.w3.org/2001/XMLSchema#token</td>
 60 |      *     <td>string</td>
 61 |      *   </tr>
 62 |      *   <tr>
 63 |      *     <td>http://www.w3.org/2001/XMLSchema#NCName</td>
 64 |      *     <td>string</td>
 65 |      *   </tr>
 66 |      *   <tr>
 67 |      *     <td>http://www.w3.org/2001/XMLSchema#boolean</td>
 68 |      *     <td>bool</td>
 69 |      *   </tr>
 70 |      *   <tr>
 71 |      *     <td>http://www.w3.org/2001/XMLSchema#decimal</td>
 72 |      *     <td>string</td>
 73 |      *   </tr>
 74 |      *   <tr>
 75 |      *     <td>http://www.w3.org/2001/XMLSchema#integer</td>
 76 |      *     <td>int</td>
 77 |      *   </tr>
 78 |      *   <tr>
 79 |      *     <td>http://www.w3.org/2001/XMLSchema#int</td>
 80 |      *     <td>int</td>
 81 |      *   </tr>
 82 |      *   <tr>
 83 |      *     <td>http://www.w3.org/2001/XMLSchema#float</td>
 84 |      *     <td>float</td>
 85 |      *   </tr>
 86 |      *   <tr>
 87 |      *     <td>http://www.w3.org/2001/XMLSchema#double</td>
 88 |      *     <td>float</td>
 89 |      *   </tr>
 90 |      *   <tr>
 91 |      *     <td>http://www.w3.org/2001/XMLSchema#dateTime</td>
 92 |      *     <td>string</td>
 93 |      *   </tr>
 94 |      *   <tr>
 95 |      *     <td>http://www.w3.org/2001/XMLSchema#date</td>
 96 |      *     <td>string</td>
 97 |      *   </tr>
 98 |      *   <tr>
 99 |      *     <td>http://www.w3.org/2001/XMLSchema#gYear</td>
100 |      *     <td>int</td>
101 |      *   </tr>
102 |      *   <tr>
103 |      *     <td>http://www.w3.org/2001/XMLSchema#gMonthDay</td>
104 |      *     <td>string</td>
105 |      *   </tr>
106 |      *   <tr>
107 |      *     <td>http://www.w3.org/2001/XMLSchema#anyURI</td>
108 |      *     <td>{@link jQuery.uri}</td>
109 |      *   </tr>
110 |      * </table>
111 |      * @memberOf jQuery.typedValue#
112 |      */
113 |     value: undefined,
114 |     /**
115 |      * The XML Schema datatype URI for the value's datatype
116 |      * @memberOf jQuery.typedValue#
117 |      */
118 |     datatype: undefined,
119 | 
120 |     init: function (value, datatype) {
121 |       var d = $.typedValue.types[datatype];
122 |       if ($.typedValue.valid(value, datatype)) {
123 |         this.representation = value;
124 |         this.datatype = datatype;
125 |         this.value = d === undefined ? strip(value) : d.value(d.strip ? strip(value) : value);
126 |         return this;
127 |       } else {
128 |         throw {
129 |           name: 'InvalidValue',
130 |           message: value + ' is not a valid ' + datatype + ' value'
131 |         };
132 |       }
133 |     }
134 |   };
135 | 
136 |   $.typedValue.fn.init.prototype = $.typedValue.fn;
137 | 
138 |   /**
139 |    * An object that holds the datatypes supported by the script. The properties of this object are the URIs of the datatypes, and each datatype has four properties:
140 |    * <dl>
141 |    *   <dt>strip</dt>
142 |    *   <dd>A boolean value that indicates whether whitespace should be stripped from the value prior to testing against the regular expression or passing to the value function.</dd>
143 |    *   <dt>regex</dt>
144 |    *   <dd>A regular expression that valid values of the type must match.</dd>
145 |    *   <dt>validate</dt>
146 |    *   <dd>Optional. A function that performs further testing on the value.</dd>
147 |    *   <dt>value</dt>
148 |    *   <dd>A function that returns a Javascript object equivalent for the value.</dd>
149 |    * </dl>
150 |    * You can add to this object as necessary for your own datatypes, and {@link jQuery.typedValue} and {@link jQuery.typedValue.valid} will work with them.
151 |    * @see jQuery.typedValue
152 |    * @see jQuery.typedValue.valid
153 |    */
154 |   $.typedValue.types = {};
155 | 
156 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#string'] = {
157 |     regex: /^.*$/,
158 |     strip: false,
159 |     /** @ignore */
160 |     value: function (v) {
161 |       return v;
162 |     }
163 |   };
164 | 
165 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#token'] = {
166 |     regex: /^.*$/,
167 |     strip: true,
168 |     /** @ignore */
169 |     value: function (v) {
170 |       return strip(v);
171 |     }
172 |   };
173 | 
174 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#NCName'] = {
175 |     regex: /^[a-z_][-\.a-z0-9]+$/i,
176 |     strip: true,
177 |     /** @ignore */
178 |     value: function (v) {
179 |       return strip(v);
180 |     }
181 |   };
182 | 
183 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#boolean'] = {
184 |     regex: /^(?:true|false|1|0)$/,
185 |     strip: true,
186 |     /** @ignore */
187 |     value: function (v) {
188 |       return v === 'true' || v === '1';
189 |     }
190 |   };
191 | 
192 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#decimal'] = {
193 |     regex: /^[\-\+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)$/,
194 |     strip: true,
195 |     /** @ignore */
196 |     value: function (v) {
197 |       v = v.replace(/^0+/, '')
198 |         .replace(/0+$/, '');
199 |       if (v === '') {
200 |         v = '0.0';
201 |       }
202 |       if (v.substring(0, 1) === '.') {
203 |         v = '0' + v;
204 |       }
205 |       if (/\.$/.test(v)) {
206 |         v = v + '0';
207 |       } else if (!/\./.test(v)) {
208 |         v = v + '.0';
209 |       }
210 |       return v;
211 |     }
212 |   };
213 | 
214 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#integer'] = {
215 |     regex: /^[\-\+]?[0-9]+$/,
216 |     strip: true,
217 |     /** @ignore */
218 |     value: function (v) {
219 |       return parseInt(v, 10);
220 |     }
221 |   };
222 | 
223 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#int'] = {
224 |     regex: /^[\-\+]?[0-9]+$/,
225 |     strip: true,
226 |     /** @ignore */
227 |     value: function (v) {
228 |       return parseInt(v, 10);
229 |     }
230 |   };
231 | 
232 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#float'] = {
233 |     regex: /^(?:[\-\+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:[eE][\-\+]?[0-9]+)?|[\-\+]?INF|NaN)$/,
234 |     strip: true,
235 |     /** @ignore */
236 |     value: function (v) {
237 |       if (v === '-INF') {
238 |         return -1 / 0;
239 |       } else if (v === 'INF' || v === '+INF') {
240 |         return 1 / 0;
241 |       } else {
242 |         return parseFloat(v);
243 |       }
244 |     }
245 |   };
246 | 
247 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#double'] = {
248 |     regex: $.typedValue.types['http://www.w3.org/2001/XMLSchema#float'].regex,
249 |     strip: true,
250 |     value: $.typedValue.types['http://www.w3.org/2001/XMLSchema#float'].value
251 |   };
252 | 
253 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#duration'] = {
254 |     regex: /^([\-\+])?P(?:([0-9]+)Y)?(?:([0-9]+)M)?(?:([0-9]+)D)?(?:T(?:([0-9]+)H)?(?:([0-9]+)M)?(?:([0-9]+(?:\.[0-9]+)?)?S)?)$/,
255 |     /** @ignore */
256 |     validate: function (v) {
257 |       var m = this.regex.exec(v);
258 |       return m[2] || m[3] || m[4] || m[5] || m[6] || m[7];
259 |     },
260 |     strip: true,
261 |     /** @ignore */
262 |     value: function (v) {
263 |       return v;
264 |     }
265 |   };
266 | 
267 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#yearMonthDuration'] = {
268 |     regex: /^([\-\+])?P(?:([0-9]+)Y)?(?:([0-9]+)M)?$/,
269 |     /** @ignore */
270 |     validate: function (v) {
271 |       var m = this.regex.exec(v);
272 |       return m[2] || m[3];
273 |     },
274 |     strip: true,
275 |     /** @ignore */
276 |     value: function (v) {
277 |       var m = this.regex.exec(v),
278 |         years = m[2] || 0,
279 |         months = m[3] || 0;
280 |       months += years * 12;
281 |       return m[1] === '-' ? -1 * months : months;
282 |     }
283 |   };
284 | 
285 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#dateTime'] = {
286 |     regex: /^(-?[0-9]{4,})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):(([0-9]{2})(\.([0-9]+))?)((?:[\-\+]([0-9]{2}):([0-9]{2}))|Z)?$/,
287 |     /** @ignore */
288 |     validate: function (v) {
289 |       var
290 |         m = this.regex.exec(v),
291 |         year = parseInt(m[1], 10),
292 |         tz = m[10] === undefined || m[10] === 'Z' ? '+0000' : m[10].replace(/:/, ''),
293 |         date;
294 |       if (year === 0 ||
295 |           parseInt(tz, 10) < -1400 || parseInt(tz, 10) > 1400) {
296 |         return false;
297 |       }
298 |       try {
299 |         year = year < 100 ? Math.abs(year) + 1000 : year;
300 |         month = parseInt(m[2], 10);
301 |         day = parseInt(m[3], 10);
302 |         if (day > 31) {
303 |           return false;
304 |         } else if (day > 30 && !(month === 1 || month === 3 || month === 5 || month === 7 || month === 8 || month === 10 || month === 12)) {
305 |           return false;
306 |         } else if (month === 2) {
307 |           if (day > 29) {
308 |             return false;
309 |           } else if (day === 29 && (year % 4 !== 0 || (year % 100 === 0 && year % 400 !== 0))) {
310 |             return false;
311 |           }
312 |         }
313 |         date = '' + year + '/' + m[2] + '/' + m[3] + ' ' + m[4] + ':' + m[5] + ':' + m[7] + ' ' + tz;
314 |         date = new Date(date);
315 |         return true;
316 |       } catch (e) {
317 |         return false;
318 |       }
319 |     },
320 |     strip: true,
321 |     /** @ignore */
322 |     value: function (v) {
323 |       return v;
324 |     }
325 |   };
326 | 
327 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#date'] = {
328 |     regex: /^(-?[0-9]{4,})-([0-9]{2})-([0-9]{2})((?:[\-\+]([0-9]{2}):([0-9]{2}))|Z)?$/,
329 |     /** @ignore */
330 |     validate: function (v) {
331 |       var
332 |         m = this.regex.exec(v),
333 |         year = parseInt(m[1], 10),
334 |         month = parseInt(m[2], 10),
335 |         day = parseInt(m[3], 10),
336 |         tz = m[10] === undefined || m[10] === 'Z' ? '+0000' : m[10].replace(/:/, '');
337 |       if (year === 0 ||
338 |           month > 12 ||
339 |           day > 31 ||
340 |           parseInt(tz, 10) < -1400 || parseInt(tz, 10) > 1400) {
341 |         return false;
342 |       } else {
343 |         return true;
344 |       }
345 |     },
346 |     strip: true,
347 |     /** @ignore */
348 |     value: function (v) {
349 |       return v;
350 |     }
351 |   };
352 | 
353 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#gYear'] = {
354 |     regex: /^-?([0-9]{4,})$/,
355 |     /** @ignore */
356 |     validate: function (v) {
357 |       var i = parseInt(v, 10);
358 |       return i !== 0;
359 |     },
360 |     strip: true,
361 |     /** @ignore */
362 |     value: function (v) {
363 |       return parseInt(v, 10);
364 |     }
365 |   };
366 | 
367 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#gMonthDay'] = {
368 |     regex: /^--([0-9]{2})-([0-9]{2})((?:[\-\+]([0-9]{2}):([0-9]{2}))|Z)?$/,
369 |     /** @ignore */
370 |     validate: function (v) {
371 |       var
372 |         m = this.regex.exec(v),
373 |         month = parseInt(m[1], 10),
374 |         day = parseInt(m[2], 10),
375 |         tz = m[3] === undefined || m[3] === 'Z' ? '+0000' : m[3].replace(/:/, '');
376 |       if (month > 12 ||
377 |           day > 31 ||
378 |           parseInt(tz, 10) < -1400 || parseInt(tz, 10) > 1400) {
379 |         return false;
380 |       } else if (month === 2 && day > 29) {
381 |         return false;
382 |       } else if ((month === 4 || month === 6 || month === 9 || month === 11) && day > 30) {
383 |         return false;
384 |       } else {
385 |         return true;
386 |       }
387 |     },
388 |     strip: true,
389 |     /** @ignore */
390 |     value: function (v) {
391 |       return v;
392 |     }
393 |   };
394 | 
395 |   $.typedValue.types['http://www.w3.org/2001/XMLSchema#anyURI'] = {
396 |     regex: /^.*$/,
397 |     strip: true,
398 |     /** @ignore */
399 |     value: function (v, options) {
400 |       var opts = $.extend({}, $.typedValue.defaults, options);
401 |       return $.uri.resolve(v, opts.base);
402 |     }
403 |   };
404 | 
405 |   $.typedValue.defaults = {
406 |     base: $.uri.base(),
407 |     namespaces: {}
408 |   };
409 | 
410 |   /**
411 |    * Checks whether a value is valid according to a given datatype. The datatype must be held in the {@link jQuery.typedValue.types} object.
412 |    * @param {String} value The value to validate.
413 |    * @param {String} datatype The URI for the datatype against which the value will be validated.
414 |    * @returns {boolean} True if the value is valid or the datatype is not recognised.
415 |    * @example validDate = $.typedValue.valid(date, 'http://www.w3.org/2001/XMLSchema#date');
416 |    */
417 |   $.typedValue.valid = function (value, datatype) {
418 |     var d = $.typedValue.types[datatype];
419 |     if (d === undefined) {
420 |       return true;
421 |     } else {
422 |       value = d.strip ? strip(value) : value;
423 |       if (d.regex.test(value)) {
424 |         return d.validate === undefined ? true : d.validate(value);
425 |       } else {
426 |         return false;
427 |       }
428 |     }
429 |   };
430 | 
431 | })(jQuery);
432 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.rules.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * jQuery RDF Rules @VERSION
  3 |  * 
  4 |  * Copyright (c) 2008 Jeni Tennison
  5 |  * Licensed under the MIT (MIT-LICENSE.txt)
  6 |  *
  7 |  * Depends:
  8 |  *  jquery.uri.js
  9 |  *  jquery.xmlns.js
 10 |  *  jquery.datatype.js
 11 |  *  jquery.curie.js
 12 |  *  jquery.rdf.js
 13 |  */
 14 | /**
 15 |  * @fileOverview jQuery RDF Rules
 16 |  * @author <a href="mailto:jeni@jenitennison.com">Jeni Tennison</a>
 17 |  * @copyright (c) 2008,2009 Jeni Tennison
 18 |  * @license MIT license (MIT-LICENSE.txt)
 19 |  * @version 1.0
 20 |  */
 21 | /**
 22 |  * @exports $ as jQuery
 23 |  */
 24 | /**
 25 |  * @ignore
 26 |  */
 27 | (function ($) {
 28 | 
 29 |   var
 30 |     blankNodeNum = 1;
 31 | 
 32 |   /**
 33 |    * <p>Creates a new jQuery.rdf.ruleset object. This should be invoked as a method rather than constructed using new.</p>
 34 |    * @class A jQuery.rdf.ruleset object represents a set of {@link jQuery.rdf.rule}s that can be run over a databank.
 35 |    * @param {jQuery.rdf.rule[]} [rules=[]] An array of rules with which the ruleset is initialised.
 36 |    * @param {Object} [options] Initialisation options for the ruleset.
 37 |    * @param {Object} [options.namespaces] An object representing a set of namespace bindings which are stored and used whenever a CURIE is used within a rule.
 38 |    * @param {String|jQuery.uri} [options.base] The base URI used to interpret any relative URIs used within the rules.
 39 |    * @returns {jQuery.rdf.ruleset}
 40 |    * @example rules = jQuery.rdf.ruleset();
 41 |    * @see jQuery.rdf.rule
 42 |    */
 43 |   $.rdf.ruleset = function (rules, options) {
 44 |     return new $.rdf.ruleset.fn.init(rules, options);
 45 |   };
 46 | 
 47 |   $.rdf.ruleset.fn = $.rdf.ruleset.prototype = {
 48 |     init: function (rules, options) {
 49 |       var i,
 50 |         opts = $.extend({}, $.rdf.ruleset.defaults, options);
 51 |       rules = rules || [];
 52 |       this.baseURI = opts.base;
 53 |       this.namespaces = $.extend({}, opts.namespaces);
 54 |       this.rules = [];
 55 |       for (i = 0; i < rules.length; i += 1) {
 56 |         this.add.apply(this, rules[i]);
 57 |       }
 58 |       return this;
 59 |     },
 60 |     
 61 |     /**
 62 |      * Sets or returns the base URI of the {@link jQuery.rdf.ruleset}.
 63 |      * @param {String|jQuery.uri} [base]
 64 |      * @returns A {@link jQuery.uri} if no base URI is specified, otherwise returns this {@link jQuery.rdf.ruleset} object.
 65 |      * @example 
 66 |      * rules = $.rdf.ruleset()
 67 |      *   .base('http://www.example.org/');
 68 |      */
 69 |     base: function (uri) {
 70 |       if (uri === undefined) {
 71 |         return this.baseURI;
 72 |       } else {
 73 |         this.baseURI = uri;
 74 |         return this;
 75 |       }
 76 |     },
 77 |     
 78 |     /**
 79 |      * Sets or returns a namespace binding on the {@link jQuery.rdf.ruleset}.
 80 |      * @param {String} [prefix]
 81 |      * @param {String} [namespace]
 82 |      * @returns {Object|jQuery.uri|jQuery.rdf} If no prefix or namespace is specified, returns an object providing all namespace bindings on the {@link jQuery.rdf.ruleset}. If a prefix is specified without a namespace, returns the {@link jQuery.uri} associated with that prefix. Otherwise returns this {@link jQuery.rdf} object after setting the namespace binding.
 83 |      */
 84 |     prefix: function (prefix, uri) {
 85 |       if (prefix === undefined) {
 86 |         return this.namespaces;
 87 |       } else if (uri === undefined) {
 88 |         return this.namespaces[prefix];
 89 |       } else {
 90 |         this.namespaces[prefix] = uri;
 91 |         return this;
 92 |       }
 93 |     },
 94 |     
 95 |     /**
 96 |      * Returns the number of rules in this ruleset.
 97 |      * @returns {Integer}
 98 |      */
 99 |     size: function () {
100 |       return this.rules.length;
101 |     },
102 |     
103 |     /**
104 |      * Adds a rule or set of rules to this ruleset.
105 |      * @param {String|Array|Function|jQuery.rdf.pattern|jQuery.rdf.rule|jQuery.rdf.ruleset} lhs A {@link jQuery.rdf.rule} will be added directly. If a {@link jQuery.rdf.ruleset} is provided then all its rules will be added to this one. Otherwise, specifies the left hand side of the rule to be added, as in {@link jQuery.rdf.rule}.
106 |      * @param {String|Function|jQuery.rdf.pattern} rhs The right hand side of the rule to be added.
107 |      * @returns {jQuery.rdf.ruleset} Returns this {@link jQuery.rdf.ruleset}
108 |      * @see jQuery.rdf.rule
109 |      * @example
110 |      * rules = $.rdf.ruleset()
111 |      *   .prefix('foaf', ns.foaf)
112 |      *   .add('?person a foaf:Person', '?person a foaf:Agent');
113 |      */
114 |     add: function (lhs, rhs) {
115 |       var rule;
116 |       if (rhs === undefined && lhs.rules) {
117 |         this.rules = this.rules.concat(lhs.rules);
118 |       } else {
119 |         if (rhs === undefined && lhs.lhs) {
120 |           rule = lhs;
121 |         } else {
122 |           rule = $.rdf.rule(lhs, rhs, { namespaces: this.prefix(), base: this.base() });
123 |         }
124 |         if ($.inArray(rule, this.rules) === -1) {
125 |           this.rules.push(rule);
126 |         }
127 |       }
128 |       return this;
129 |     },
130 |     
131 |     /**
132 |      * Runs the rules held in this ruleset on the data passed as the first argument.
133 |      * @param {jQuery.rdf.databank} data A databank containing data to be reasoned over and added to.
134 |      * @param {Object} [options]
135 |      * @param {Integer} [options.limit=50] The rules in this ruleset are generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the ruleset will be run before halting.
136 |      * @returns {jQuery.rdf.ruleset} Returns this ruleset.
137 |      * @example
138 |      * rules = $.rdf.ruleset()
139 |      *   .prefix('foaf', ns.foaf)
140 |      *   .add('?person a foaf:Person', '?person a foaf:Agent')
141 |      *   .run(data);
142 |      * @see jQuery.rdf#reason
143 |      * @see jQuery.rdf.databank#reason
144 |      */
145 |     run: function (data, options) {
146 |       var i, r, ntriples,
147 |         opts = $.extend({ limit: 50 }, options),
148 |         limit = opts.limit;
149 |       do {
150 |         ntriples = data.size();
151 |         for (i = 0; i < this.rules.length; i += 1) {
152 |           r = this.rules[i];
153 |           r.run(data);
154 |         }
155 |         limit -= 1;
156 |       } while (data.size() > ntriples && limit > 0);
157 |       return this;
158 |     }
159 |   };
160 |   
161 |   $.rdf.ruleset.fn.init.prototype = $.rdf.ruleset.fn;
162 |   
163 |   $.rdf.ruleset.defaults = {
164 |     base: $.uri.base(),
165 |     namespaces: {}
166 |   };
167 | 
168 | /* Rules */
169 | 
170 |   /**
171 |    * <p>Creates a new jQuery.rdf.rule object. This should be invoked as a method rather than constructed using new.</p>
172 |    * @class A jQuery.rdf.rule object represents a rule that can be run over a {@link jQuery.rdf.databank}.
173 |    * @param {Object[]} lhs The left-hand side of the rule. This can be an array containing multiple conditions, or a single condition on its own. Each condition is one of:
174 |    * <ul>
175 |    *   <li>A {@link jQuery.rdf.pattern} or a string which is interpreted as a {@link jQuery.rdf.pattern}, which is used to match triples as with the {@link jQuery.rdf#where} method.</li>
176 |    *   <li>A function which must return true for the rule to be satisfied. The arguments for the function are as described in the documentation for {@link jQuery.rdf#filter}.</li>
177 |    *   <li>An array of two items: a variable name and either a regular expression or a value that it matches against (as used in the two arguments to {@link jQuery.rdf#filter}).</li>
178 |    * </ul>
179 |    * @param {Function|String[]} rhs The right-hand side of the rule. This can be an array of strings which are interpreted as patterns and used to create new triples when the rule is fired. If the patterns contain references to blank nodes, new blank nodes are created each time the rule is fired. Alternatively, it can be a function which is executed when the rule is fired. The function needs to have the same signature as that used for {@link jQuery.rdf#map}.
180 |    * @param {Object} [options] Initialisation options for the rules.
181 |    * @param {Object} [options.namespaces] An object representing a set of namespace bindings which are stored and used whenever a CURIE is used within the left or right-hand sides of the rule.
182 |    * @param {String|jQuery.uri} [options.base] The base URI used to interpret any relative URIs used within the rule.
183 |    * @returns {jQuery.rdf.rule}
184 |    * @example $.rdf.rule('?person a foaf:Person', '?person a foaf:Agent', { namespaces: ns });
185 |    * @example
186 |    * var rule = $.rdf.rule(
187 |    *   ['?person a vcard:VCard',
188 |    *    '?person vcard:fn ?name'],
189 |    *   ['?person a foaf:Person',
190 |    *    '?person foaf:name ?name'],
191 |    *   { namespaces: ns }
192 |    * );
193 |    * @example
194 |    * var rule = $.rdf.rule(
195 |    *   ['?person a foaf:Person',
196 |    *    '?person foaf:firstName ?fn'],
197 |    *   ['?person a vcard:VCard',
198 |    *    '?person vcard:n _:name',
199 |    *    '_:name a vcard:Name', 
200 |    *    '_:name vcard:given-name ?fn'],
201 |    *   { namespaces: ns }
202 |    * );
203 |    * @example
204 |    * var rule = $.rdf.rule(
205 |    *   ['?person foaf:name ?name', 
206 |    *    ['name', /^J.+/]], 
207 |    *  function () { name = this.name }, 
208 |    *  { namespaces: ns });
209 |    * @see jQuery.rdf.rule
210 |    */
211 |   $.rdf.rule = function (lhs, rhs, options) {
212 |     return new $.rdf.rule.fn.init(lhs, rhs, options);
213 |   };
214 | 
215 |   $.rdf.rule.fn = $.rdf.rule.prototype = {
216 |     init: function (lhs, rhs, options) {
217 |       var opts = $.extend({}, $.rdf.rule.defaults, options),
218 |         lhsWildcards = [], rhsBlanks = false;
219 |       if (typeof lhs === 'string') {
220 |         lhs = [lhs];
221 |       }
222 |       if (typeof rhs === 'string') {
223 |         rhs = [rhs];
224 |       }
225 |       this.lhs = $.map(lhs, function (p) {
226 |         if ($.isArray(p)) {
227 |           return [p];
228 |         } else if ($.isFunction(p)) {
229 |           return p;
230 |         } else {
231 |           p = $.rdf.pattern(p, opts);
232 |           if (typeof p.subject === 'string') {
233 |             lhsWildcards.push(p.subject);
234 |           }
235 |           if (typeof p.property === 'string') {
236 |             lhsWildcards.push(p.property);
237 |           }
238 |           if (typeof p.object === 'string') {
239 |             lhsWildcards.push(p.object);
240 |           }
241 |           return p;
242 |         }
243 |       });
244 |       lhsWildcards = $.unique(lhsWildcards);
245 |       if ($.isFunction(rhs)) {
246 |         this.rhs = rhs;
247 |       } else {
248 |         this.rhs = $.map(rhs, function (p) {
249 |           p = $.rdf.pattern(p, opts);
250 |           if ((typeof p.subject === 'string' && $.inArray(p.subject, lhsWildcards) === -1) ||
251 |               (typeof p.property === 'string' && $.inArray(p.property, lhsWildcards) === -1) ||
252 |               (typeof p.object === 'string' && $.inArray(p.object, lhsWildcards) === -1)) {
253 |             throw "Bad Rule: Right-hand side of the rule contains a reference to an unbound wildcard";
254 |           } else if (p.subject.type === 'bnode' || p.property.type === 'bnode' || p.object.type === 'bnode') {
255 |             rhsBlanks = true;
256 |           }
257 |           return p;
258 |         });
259 |       }
260 |       this.rhsBlanks = rhsBlanks;
261 |       this.cache = {};
262 |       return this;
263 |     },
264 |     
265 |     /**
266 |      * Runs the rule on the data passed as the first argument.
267 |      * @param {jQuery.rdf.databank} data A databank containing data to be reasoned over and added to.
268 |      * @param {Object} [options]
269 |      * @param {Integer} [options.limit=50] The rule isArray generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the rule will be run before halting.
270 |      * @returns {jQuery.rdf.rule} Returns this rule.
271 |      * @example
272 |      * $.rdf.rule('?person a foaf:Person', '?person a foaf:Agent', { namespaces: ns })
273 |      *   .run(data);
274 |      * @see jQuery.rdf.ruleset#run
275 |      * @see jQuery.rdf#reason
276 |      * @see jQuery.rdf.databank#reason
277 |      */
278 |     run: function (data, options) {
279 |       var query = $.rdf({ databank: data }), 
280 |         condition,
281 |         opts = $.extend({ limit: 50 }, options), limit = opts.limit,
282 |         ntriples,
283 |         i, j, pattern, s, p, o, q,
284 |         blanks = this.rhsBlanks,
285 |         cache, sources, triples, add;
286 |       if (this.cache[data.id] === undefined) {
287 |         this.cache[data.id] = {};
288 |       }
289 |       for (i = 0; i < this.lhs.length; i += 1) {
290 |         condition = this.lhs[i];
291 |         if ($.isArray(condition)) {
292 |           query = query.filter.apply(query, condition);
293 |         } else if ($.isFunction(condition)) {
294 |           query = query.filter.call(query, condition);
295 |         } else {
296 |           query = query.where(this.lhs[i]);
297 |         }
298 |       }
299 |       do {
300 |         ntriples = query.length;
301 |         sources = query.sources();
302 |         for (i = 0; i < ntriples; i += 1) {
303 |           triples = sources[i];
304 |           add = true;
305 |           cache = this.cache[data.id];
306 |           for (j = 0; j < triples.length; j += 1) {
307 |             if (cache[triples[j]] === undefined) {
308 |               cache[triples[j]] = {};
309 |             } else if (j === triples.length - 1) {
310 |               add = false;
311 |             }
312 |             cache = cache[triples[j]];
313 |           }
314 |           if (add) {
315 |             q = query.eq(i);
316 |             if (blanks) {
317 |               for (j = 0; j < this.rhs.length; j += 1) {
318 |                 pattern = this.rhs[j];
319 |                 s = pattern.subject;
320 |                 p = pattern.property;
321 |                 o = pattern.object;
322 |                 if (s.type === 'bnode') {
323 |                   s = $.rdf.blank('' + s + blankNodeNum);
324 |                 }
325 |                 if (p.type === 'bnode') {
326 |                   p = $.rdf.blank('' + p + blankNodeNum);
327 |                 }
328 |                 if (o.type === 'bnode') {
329 |                   o = $.rdf.blank('' + o + blankNodeNum);
330 |                 }
331 |                 pattern = $.rdf.pattern(s, p, o);
332 |                 q.add(pattern);
333 |               }
334 |               blankNodeNum += 1;
335 |             } else if ($.isFunction(this.rhs)) {
336 |               q.map(this.rhs);
337 |             } else {
338 |               for (j = 0; j < this.rhs.length; j += 1) {
339 |                 q.add(this.rhs[j]);
340 |               }
341 |             }
342 |           }
343 |         }
344 |         limit -= 1;
345 |       } while (query.length > ntriples && limit > 0);
346 |       return this;
347 |     }
348 |   };
349 | 
350 |   $.rdf.rule.fn.init.prototype = $.rdf.rule.fn;
351 | 
352 |   $.rdf.rule.defaults = {
353 |     base: $.uri.base(),
354 |     namespaces: {}
355 |   };
356 | 
357 |   $.extend($.rdf.databank.fn, {
358 |     /**
359 |      * @methodOf jQuery.rdf.databank#
360 |      * @name jQuery.rdf.databank#reason
361 |      * @description Reasons over this databank using the {@link jQuery.rdf.rule} or {@link jQuery.rdf.ruleset} given as the first argument.
362 |      * @param {jQuery.rdf.rule|jQuery.rdf.ruleset} rules The rules to run over the databank.
363 |      * @param {Object} [options]
364 |      * @param {Integer} [options.limit=50] The rules in this ruleset are generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the ruleset will be run before halting.
365 |      * @returns {jQuery.rdf.databank} The original {@link jQuery.rdf.databank}, although it may now contain more triples.
366 |      * @see jQuery.rdf.ruleset#run
367 |      * @see jQuery.rdf.rule#run
368 |      */
369 |     reason: function (rule, options) {
370 |       rule.run(this, options);
371 |       return this;
372 |     }
373 |   });
374 |   
375 |   $.extend($.rdf.fn, {
376 |     /**
377 |      * @methodOf jQuery.rdf#
378 |      * @name jQuery.rdf#reason
379 |      * @description Reasons over the {@link jQuery.rdf#databank} associated with this {@link jQuery.rdf} object using the {@link jQuery.rdf.rule} or {@link jQuery.rdf.ruleset} given as the first argument.
380 |      * @param {jQuery.rdf.rule|jQuery.rdf.ruleset} rules The rules to run over the databank.
381 |      * @param {Object} [options]
382 |      * @param {Integer} [options.limit=50] The rules in this ruleset are generally run over the {@link jQuery.rdf.databank} until it stops growing. In some situations, notably when creating blank nodes, this can lead to an infinite loop. The limit option indicates the maximum number of times the ruleset will be run before halting.
383 |      * @returns {jQuery.rdf} The original {@link jQuery.rdf} object, although it may now contain more matches because of the new triples added to its underlying databank.
384 |      * @see jQuery.rdf.ruleset#run
385 |      * @see jQuery.rdf.rule#run
386 |      */
387 |     reason: function (rule, options) {
388 |       rule.run(this.databank, options);
389 |       return this;
390 |     }
391 |   });
392 | 
393 | })(jQuery);
394 | 


--------------------------------------------------------------------------------
/thirdparty/jquery.rdf.xml.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * jQuery RDF @VERSION
  3 |  *
  4 |  * Copyright (c) 2008,2009 Jeni Tennison
  5 |  * Licensed under the MIT (MIT-LICENSE.txt)
  6 |  *
  7 |  * Depends:
  8 |  *  jquery.uri.js
  9 |  *  jquery.xmlns.js
 10 |  *  jquery.datatype.js
 11 |  *  jquery.curie.js
 12 |  *  jquery.rdf.js
 13 |  *  jquery.rdf.json.js
 14 |  *  jquery.rdf.xml.js
 15 |  */
 16 | /**
 17 |  * @fileOverview jQuery RDF/XML parser
 18 |  * @author <a href="mailto:jeni@jenitennison.com">Jeni Tennison</a>
 19 |  * @copyright (c) 2008,2009 Jeni Tennison
 20 |  * @license MIT license (MIT-LICENSE.txt)
 21 |  * @version 1.0
 22 |  */
 23 | /**
 24 |  * @exports $ as jQuery
 25 |  */
 26 | /**
 27 |  * @ignore
 28 |  */
 29 | (function ($) {
 30 |   var
 31 |     rdfNs = "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
 32 |   
 33 |     addAttribute = function (parent, namespace, name, value) {
 34 |       var doc = parent.ownerDocument,
 35 |         a;
 36 |       if (namespace !== undefined && namespace !== null) {
 37 |         if (doc.createAttributeNS) {
 38 |           a = doc.createAttributeNS(namespace, name);
 39 |           a.nodeValue = value;
 40 |           parent.attributes.setNamedItemNS(a);
 41 |         } else {
 42 |           a = doc.createNode(2, name, namespace);
 43 |           a.nodeValue = value;
 44 |           parent.attributes.setNamedItem(a);
 45 |         }
 46 |       } else {
 47 |         a = doc.createAttribute(name);
 48 |         a.nodeValue = value;
 49 |         parent.attributes.setNamedItem(a);
 50 |       }
 51 |       return parent;
 52 |     },
 53 | 
 54 |     createXmlnsAtt = function (parent, namespace, prefix) {
 55 |       if (namespace === 'http://www.w3.org/XML/1998/namespace' || namespace === 'http://www.w3.org/2000/xmlns/') {
 56 |       } else if (prefix) {
 57 |         addAttribute(parent, 'http://www.w3.org/2000/xmlns/', 'xmlns:' + prefix, namespace);
 58 |       } else {
 59 |         addAttribute(parent, undefined, 'xmlns', namespace);
 60 |       }
 61 |       return parent;
 62 |     },
 63 | 
 64 |     createDocument = function (namespace, name) {
 65 |       var doc, xmlns = '', prefix, addAttribute = false;
 66 |       if (namespace !== undefined && namespace !== null) {
 67 |         if (/:/.test(name)) {
 68 |           prefix = /([^:]+):/.exec(name)[1];
 69 |         }
 70 |         addAttribute = true;
 71 |       }
 72 |       if (document.implementation &&
 73 |           document.implementation.createDocument) {
 74 |         doc = document.implementation.createDocument(namespace, name, null);
 75 |         if (addAttribute) {
 76 |           createXmlnsAtt(doc.documentElement, namespace, prefix);
 77 |         }
 78 |         return doc;
 79 |       } else {
 80 |         doc = new ActiveXObject("Microsoft.XMLDOM");
 81 |         doc.async = "false";
 82 |         if (prefix === undefined) {
 83 |           xmlns = ' xmlns="' + namespace + '"';
 84 |         } else {
 85 |           xmlns = ' xmlns:' + prefix + '="' + namespace + '"';
 86 |         }
 87 |         doc.loadXML('<' + name + xmlns + '/>');
 88 |         return doc;
 89 |       }
 90 |     },
 91 | 
 92 |     appendElement = function (parent, namespace, name, indent) {
 93 |       var doc = parent.ownerDocument,
 94 |         e;
 95 |       if (namespace !== undefined && namespace !== null) {
 96 |         e = doc.createElementNS ? doc.createElementNS(namespace, name) : doc.createNode(1, name, namespace);
 97 |       } else {
 98 |         e = doc.createElement(name);
 99 |       }
100 |       if (indent !== -1) {
101 |         appendText(parent, '\n');
102 |         if (indent === 0) {
103 |           appendText(parent, '\n');
104 |         } else {
105 |           appendText(parent, '  ');
106 |         }
107 |       }
108 |       parent.appendChild(e);
109 |       return e;
110 |     },
111 | 
112 |     appendText = function (parent, text) {
113 |       var doc = parent.ownerDocument,
114 |         t;
115 |       t = doc.createTextNode(text);
116 |       parent.appendChild(t);
117 |       return parent;
118 |     },
119 | 
120 |     appendXML = function (parent, xml) {
121 |       var parser, doc, i, child;
122 |       try {
123 |         doc = new ActiveXObject('Microsoft.XMLDOM');
124 |         doc.async = "false";
125 |         doc.loadXML('<temp>' + xml + '</temp>');
126 |       } catch(e) {
127 |         parser = new DOMParser();
128 |         doc = parser.parseFromString('<temp>' + xml + '</temp>', 'text/xml');
129 |       }
130 |       for (i = 0; i < doc.documentElement.childNodes.length; i += 1) {
131 |         parent.appendChild(doc.documentElement.childNodes[i].cloneNode(true));
132 |       }
133 |       return parent;
134 |     },
135 | 
136 |     createRdfXml = function (triples, options) {
137 |       var doc = createDocument(rdfNs, 'rdf:RDF'),
138 |         dump = $.rdf.parsers['application/json'].dump(triples),
139 |         namespaces = options.namespaces || {},
140 |         indent = options.indent || false,
141 |         n, s, se, p, pe, i, v,
142 |         m, local, ns, prefix;
143 |       for (n in namespaces) {
144 |         createXmlnsAtt(doc.documentElement, namespaces[n], n);
145 |       }
146 |       for (s in dump) {
147 |         if (dump[s][$.rdf.type.value] !== undefined) {
148 |           m = /(.+[#\/])([^#\/]+)/.exec(dump[s][$.rdf.type.value][0].value);
149 |           ns = m[1];
150 |           local = m[2];
151 |           for (n in namespaces) {
152 |             if (namespaces[n].toString() === ns) {
153 |               prefix = n;
154 |               break;
155 |             }
156 |           }
157 |           se = appendElement(doc.documentElement, ns, prefix + ':' + local, indent ? 0 : -1);
158 |         } else {
159 |           se = appendElement(doc.documentElement, rdfNs, 'rdf:Description', indent ? 0 : -1);
160 |         }
161 |         if (/^_:/.test(s)) {
162 |           addAttribute(se, rdfNs, 'rdf:nodeID', s.substring(2));
163 |         } else {
164 |           addAttribute(se, rdfNs, 'rdf:about', s);
165 |         }
166 |         for (p in dump[s]) {
167 |           if (p !== $.rdf.type.value.toString() || dump[s][p].length > 1) {
168 |             m = /(.+[#\/])([^#\/]+)/.exec(p);
169 |             ns = m[1];
170 |             local = m[2];
171 |             for (n in namespaces) {
172 |               if (namespaces[n].toString() === ns) {
173 |                 prefix = n;
174 |                 break;
175 |               }
176 |             }
177 |             for (i = (p === $.rdf.type.value.toString() ? 1 : 0); i < dump[s][p].length; i += 1) {
178 |               v = dump[s][p][i];
179 |               pe = appendElement(se, ns, prefix + ':' + local, indent ? 1 : -1);
180 |               if (v.type === 'uri') {
181 |                 addAttribute(pe, rdfNs, 'rdf:resource', v.value);
182 |               } else if (v.type === 'literal') {
183 |                 if (v.datatype !== undefined) {
184 |                   if (v.datatype === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral') {
185 |                     addAttribute(pe, rdfNs, 'rdf:parseType', 'Literal');
186 |                     if (indent) {
187 |                       appendText(pe, '\n    ');
188 |                     }
189 |                     appendXML(pe, v.value);
190 |                     if (indent) {
191 |                       appendText(pe, '\n  ');
192 |                     }
193 |                   } else {
194 |                     addAttribute(pe, rdfNs, 'rdf:datatype', v.datatype);
195 |                     appendText(pe, v.value);
196 |                   }
197 |                 } else if (v.lang !== undefined) {
198 |                   addAttribute(pe, 'http://www.w3.org/XML/1998/namespace', 'xml:lang', v.lang);
199 |                   appendText(pe, v.value);
200 |                 } else {
201 |                   appendText(pe, v.value);
202 |                 }
203 |               } else {
204 |                 // blank node
205 |                 addAttribute(pe, rdfNs, 'rdf:nodeID', v.value.substring(2));
206 |               }
207 |             }
208 |             if (indent) {
209 |               appendText(se, '\n');
210 |             }
211 |           }
212 |         }
213 |       }
214 |       if (indent) {
215 |         appendText(doc.documentElement, '\n\n');
216 |       }
217 |       return doc;
218 |     },
219 | 
220 |     getDefaultNamespacePrefix = function (namespaceUri) {
221 |       switch (namespaceUri) {
222 |         case 'http://www.w3.org/1999/02/22-rdf-syntax-ns':
223 |           return 'rdf';
224 |         case 'http://www.w3.org/XML/1998/namespace':
225 |           return 'xml';
226 |         case 'http://www.w3.org/2000/xmlns/':
227 |           return 'xmlns';
228 |         default:
229 |           throw ('No default prefix mapped for namespace ' + namespaceUri);
230 |       }
231 |     },
232 | 
233 |     hasAttributeNS  = function(elem, namespace, name){
234 |       var basename;
235 |       if (elem.hasAttributeNS) {
236 |         return elem.hasAttributeNS(namespace, name);
237 |       } else {
238 |         try {
239 |           basename = /:/.test(name) ? /:(.+)$/.exec(name)[1] : name;
240 |           return elem.attributes.getQualifiedItem(basename, namespace) !== null;
241 |         } catch (e) {
242 |           return elem.getAttribute(getDefaultNamespacePrefix(namespace) + ':' + name) !== null;
243 |         }
244 |       }
245 |     },
246 | 
247 |     getAttributeNS = function(elem, namespace, name){
248 |       var basename;
249 |       if (elem.getAttributeNS) {
250 |         return elem.getAttributeNS(namespace, name);
251 |       } else {
252 |         try {
253 |           basename = /:/.test(name) ? /:(.+)$/.exec(name)[1] : name;
254 |           return elem.attributes.getQualifiedItem(basename, namespace).nodeValue;
255 |         } catch (e) {
256 |           return elem.getAttribute(getDefaultNamespacePrefix(namespace) + ':' + name);
257 |         }
258 |       }
259 |     },
260 | 
261 |     getLocalName = function(elem){
262 |       return elem.localName || elem.baseName;
263 |     },
264 | 
265 |     parseRdfXmlSubject = function (elem, base) {
266 |       var s, subject;
267 |       if (hasAttributeNS(elem, rdfNs, 'about')) {
268 |         s = getAttributeNS(elem, rdfNs, 'about');
269 |         subject = $.rdf.resource('<' + s + '>', { base: base });
270 |       } else if (hasAttributeNS(elem, rdfNs, 'ID')) {
271 |         s = getAttributeNS(elem, rdfNs, 'ID');
272 |         subject = $.rdf.resource('<#' + s + '>', { base: base });
273 |       } else if (hasAttributeNS(elem, rdfNs, 'nodeID')) {
274 |         s = getAttributeNS(elem, rdfNs, 'nodeID');
275 |         subject = $.rdf.blank('_:' + s);
276 |       } else {
277 |         subject = $.rdf.blank('[]');
278 |       }
279 |       return subject;
280 |     },
281 | 
282 |     parseRdfXmlDescription = function (elem, isDescription, base, lang) {
283 |       var subject, p, property, o, object, reified, lang, i, j, li = 1,
284 |         collection1, collection2, collectionItem, collectionItems = [],
285 |         parseType, serializer, literalOpts = {}, oTriples, triples = [];
286 |       lang = getAttributeNS(elem, 'http://www.w3.org/XML/1998/namespace', 'lang') || lang;
287 |       base = getAttributeNS(elem, 'http://www.w3.org/XML/1998/namespace', 'base') || base;
288 |       if (lang !== null && lang !== undefined && lang !== '') {
289 |         literalOpts = { lang: lang };
290 |       }
291 |       subject = parseRdfXmlSubject(elem, base);
292 |       if (isDescription && (elem.namespaceURI !== rdfNs || getLocalName(elem) !== 'Description')) {
293 |         property = $.rdf.type;
294 |         object = $.rdf.resource('<' + elem.namespaceURI + getLocalName(elem) + '>');
295 |         triples.push($.rdf.triple(subject, property, object));
296 |       }
297 |       for (i = 0; i < elem.attributes.length; i += 1) {
298 |         p = elem.attributes.item(i);
299 |         if (p.namespaceURI !== undefined &&
300 |             p.namespaceURI !== 'http://www.w3.org/2000/xmlns/' &&
301 |             p.namespaceURI !== 'http://www.w3.org/XML/1998/namespace' &&
302 |             p.prefix !== 'xmlns' &&
303 |             p.prefix !== 'xml') {
304 |           if (p.namespaceURI !== rdfNs) {
305 |             property = $.rdf.resource('<' + p.namespaceURI + getLocalName(p) + '>');
306 |             object = $.rdf.literal(literalOpts.lang ? p.nodeValue : '"' + p.nodeValue.replace(/"/g, '\\"') + '"', literalOpts);
307 |             triples.push($.rdf.triple(subject, property, object));
308 |           } else if (getLocalName(p) === 'type') {
309 |             property = $.rdf.type;
310 |             object = $.rdf.resource('<' + p.nodeValue + '>', { base: base });
311 |             triples.push($.rdf.triple(subject, property, object));
312 |           }
313 |         }
314 |       }
315 |       var parentLang = lang;
316 |       for (i = 0; i < elem.childNodes.length; i += 1) {
317 |         p = elem.childNodes[i];
318 |         if (p.nodeType === 1) {
319 |           if (p.namespaceURI === rdfNs && getLocalName(p) === 'li') {
320 |             property = $.rdf.resource('<' + rdfNs + '_' + li + '>');
321 |             li += 1;
322 |           } else {
323 |             property = $.rdf.resource('<' + p.namespaceURI + getLocalName(p) + '>');
324 |           }
325 |           lang = getAttributeNS(p, 'http://www.w3.org/XML/1998/namespace', 'lang') || parentLang;
326 |            if (lang !== null && lang !== undefined && lang !== '') {
327 |              literalOpts = { lang: lang };
328 |           } else {
329 |             literalOpts = {};
330 |           }
331 |           if (hasAttributeNS(p, rdfNs, 'resource')) {
332 |             o = getAttributeNS(p, rdfNs, 'resource');
333 |             object = $.rdf.resource('<' + o + '>', { base: base });
334 |           } else if (hasAttributeNS(p, rdfNs, 'nodeID')) {
335 |             o = getAttributeNS(p, rdfNs, 'nodeID');
336 |             object = $.rdf.blank('_:' + o);
337 |           } else if (hasAttributeNS(p, rdfNs, 'parseType')) {
338 |             parseType = getAttributeNS(p, rdfNs, 'parseType');
339 |             if (parseType === 'Literal') {
340 |               try {
341 |                 serializer = new XMLSerializer();
342 |                 o = serializer.serializeToString(p.getElementsByTagName('*')[0]);
343 |               } catch (e) {
344 |                 o = "";
345 |                 for (j = 0; j < p.childNodes.length; j += 1) {
346 |                   o += p.childNodes[j].xml;
347 |                 }
348 |               }
349 |               object = $.rdf.literal(o, { datatype: rdfNs + 'XMLLiteral' });
350 |             } else if (parseType === 'Resource') {
351 |               oTriples = parseRdfXmlDescription(p, false, base, lang);
352 |               if (oTriples.length > 0) {
353 |                 object = oTriples[oTriples.length - 1].subject;
354 |                 triples = triples.concat(oTriples);
355 |               } else {
356 |                 object = $.rdf.blank('[]');
357 |               }
358 |             } else if (parseType === 'Collection') {
359 |               if (p.getElementsByTagName('*').length > 0) {
360 |                 for (j = 0; j < p.childNodes.length; j += 1) {
361 |                   o = p.childNodes[j];
362 |                   if (o.nodeType === 1) {
363 |                     collectionItems.push(o);
364 |                   }
365 |                 }
366 |                 collection1 = $.rdf.blank('[]');
367 |                 object = collection1;
368 |                 for (j = 0; j < collectionItems.length; j += 1) {
369 |                   o = collectionItems[j];
370 |                   oTriples = parseRdfXmlDescription(o, true, base, lang);
371 |                   if (oTriples.length > 0) {
372 |                     collectionItem = oTriples[oTriples.length - 1].subject;
373 |                     triples = triples.concat(oTriples);
374 |                   } else {
375 |                     collectionItem = parseRdfXmlSubject(o);
376 |                   }
377 |                   triples.push($.rdf.triple(collection1, $.rdf.first, collectionItem));
378 |                   if (j === collectionItems.length - 1) {
379 |                     triples.push($.rdf.triple(collection1, $.rdf.rest, $.rdf.nil));
380 |                   } else {
381 |                     collection2 = $.rdf.blank('[]');
382 |                     triples.push($.rdf.triple(collection1, $.rdf.rest, collection2));
383 |                     collection1 = collection2;
384 |                   }
385 |                 }
386 |               } else {
387 |                 object = $.rdf.nil;
388 |               }
389 |             }
390 |           } else if (hasAttributeNS(p, rdfNs, 'datatype')) {
391 |             o = p.childNodes[0] ? p.childNodes[0].nodeValue : "";
392 |             object = $.rdf.literal(o, { datatype: getAttributeNS(p, rdfNs, 'datatype') });
393 |           } else if (p.getElementsByTagName('*').length > 0) {
394 |             for (j = 0; j < p.childNodes.length; j += 1) {
395 |               o = p.childNodes[j];
396 |               if (o.nodeType === 1) {
397 |                 oTriples = parseRdfXmlDescription(o, true, base, lang);
398 |                 if (oTriples.length > 0) {
399 |                   object = oTriples[oTriples.length - 1].subject;
400 |                   triples = triples.concat(oTriples);
401 |                 } else {
402 |                   object = parseRdfXmlSubject(o);
403 |                 }
404 |               }
405 |             }
406 |           } else if (p.childNodes.length > 0) {
407 |             o = p.childNodes[0].nodeValue;
408 |             object = $.rdf.literal(literalOpts.lang ? o : '"' + o.replace(/"/g, '\\"') + '"', literalOpts);
409 |           } else {
410 |             oTriples = parseRdfXmlDescription(p, false, base, lang);
411 |             if (oTriples.length > 0) {
412 |               object = oTriples[oTriples.length - 1].subject;
413 |               triples = triples.concat(oTriples);
414 |             } else {
415 |               object = $.rdf.blank('[]');
416 |             }
417 |           }
418 |           triples.push($.rdf.triple(subject, property, object));
419 |           if (hasAttributeNS(p, rdfNs, 'ID')) {
420 |             reified = $.rdf.resource('<#' + getAttributeNS(p, rdfNs, 'ID') + '>', { base: base });
421 |             triples.push($.rdf.triple(reified, $.rdf.subject, subject));
422 |             triples.push($.rdf.triple(reified, $.rdf.property, property));
423 |             triples.push($.rdf.triple(reified, $.rdf.object, object));
424 |           }
425 |         }
426 |       }
427 |       return triples;
428 |     },
429 | 
430 |     parseRdfXml = function (doc) {
431 |       var i, lang, d, triples = [];
432 |       if (doc.documentElement.namespaceURI === rdfNs && getLocalName(doc.documentElement) === 'RDF') {
433 |         lang = getAttributeNS(doc.documentElement, 'http://www.w3.org/XML/1998/namespace', 'lang');
434 |         base = getAttributeNS(doc.documentElement, 'http://www.w3.org/XML/1998/namespace', 'base') || $.uri.base();
435 |         triples = $.map(doc.documentElement.childNodes, function (d) {
436 |           if (d.nodeType === 1) {
437 |             return parseRdfXmlDescription(d, true, base, lang);
438 |           } else {
439 |             return null;
440 |           }
441 |         });
442 |         /*
443 |         for (i = 0; i < doc.documentElement.childNodes.length; i += 1) {
444 |           d = doc.documentElement.childNodes[i];
445 |           if (d.nodeType === 1) {
446 |             triples = triples.concat(parseRdfXmlDescription(d, true, base, lang));
447 |           }
448 |         }
449 |         */
450 |       } else {
451 |         triples = parseRdfXmlDescription(doc.documentElement, true);
452 |       }
453 |       return triples;
454 |     };
455 | 
456 |   $.rdf.parsers['application/rdf+xml'] = {
457 |     parse: function (data) {
458 |       var doc;
459 |       try {
460 |         doc = new ActiveXObject("Microsoft.XMLDOM");
461 |         doc.async = "false";
462 |         doc.loadXML(data);
463 |       } catch(e) {
464 |         var parser = new DOMParser();
465 |         doc = parser.parseFromString(data, 'text/xml');
466 |       }
467 |       return doc;
468 |     },
469 |     serialize: function (data) {
470 |       if (data.xml) {
471 |         return data.xml.replace(/\s+$/,'');
472 |       } else {
473 |         serializer = new XMLSerializer();
474 |         return serializer.serializeToString(data);
475 |       }
476 |     },
477 |     triples: parseRdfXml,
478 |     dump: createRdfXml
479 |   };
480 | 
481 | })(jQuery);
482 | 


--------------------------------------------------------------------------------