├── .gitignore ├── LICENSE ├── README.md ├── bower.json ├── bower_components ├── jquery │ ├── .bower.json │ ├── MIT-LICENSE.txt │ ├── bower.json │ ├── dist │ │ ├── jquery.js │ │ ├── jquery.min.js │ │ └── jquery.min.map │ └── src │ │ ├── ajax.js │ │ ├── ajax │ │ ├── jsonp.js │ │ ├── load.js │ │ ├── parseJSON.js │ │ ├── parseXML.js │ │ ├── script.js │ │ ├── var │ │ │ ├── nonce.js │ │ │ └── rquery.js │ │ └── xhr.js │ │ ├── attributes.js │ │ ├── attributes │ │ ├── attr.js │ │ ├── classes.js │ │ ├── prop.js │ │ ├── support.js │ │ └── val.js │ │ ├── callbacks.js │ │ ├── core.js │ │ ├── core │ │ ├── access.js │ │ ├── init.js │ │ ├── parseHTML.js │ │ ├── ready.js │ │ └── var │ │ │ └── rsingleTag.js │ │ ├── css.js │ │ ├── css │ │ ├── addGetHookIf.js │ │ ├── curCSS.js │ │ ├── defaultDisplay.js │ │ ├── hiddenVisibleSelectors.js │ │ ├── support.js │ │ ├── swap.js │ │ └── var │ │ │ ├── cssExpand.js │ │ │ ├── getStyles.js │ │ │ ├── isHidden.js │ │ │ ├── rmargin.js │ │ │ └── rnumnonpx.js │ │ ├── data.js │ │ ├── data │ │ ├── Data.js │ │ ├── accepts.js │ │ └── var │ │ │ ├── data_priv.js │ │ │ └── data_user.js │ │ ├── deferred.js │ │ ├── deprecated.js │ │ ├── dimensions.js │ │ ├── effects.js │ │ ├── effects │ │ ├── Tween.js │ │ └── animatedSelector.js │ │ ├── event.js │ │ ├── event │ │ ├── ajax.js │ │ ├── alias.js │ │ └── support.js │ │ ├── exports │ │ ├── amd.js │ │ └── global.js │ │ ├── intro.js │ │ ├── jquery.js │ │ ├── manipulation.js │ │ ├── manipulation │ │ ├── _evalUrl.js │ │ ├── support.js │ │ └── var │ │ │ └── rcheckableType.js │ │ ├── offset.js │ │ ├── outro.js │ │ ├── queue.js │ │ ├── queue │ │ └── delay.js │ │ ├── selector-native.js │ │ ├── selector-sizzle.js │ │ ├── selector.js │ │ ├── serialize.js │ │ ├── sizzle │ │ └── dist │ │ │ ├── sizzle.js │ │ │ ├── sizzle.min.js │ │ │ └── sizzle.min.map │ │ ├── traversing.js │ │ ├── traversing │ │ ├── findFilter.js │ │ └── var │ │ │ └── rneedsContext.js │ │ ├── var │ │ ├── arr.js │ │ ├── class2type.js │ │ ├── concat.js │ │ ├── hasOwn.js │ │ ├── indexOf.js │ │ ├── pnum.js │ │ ├── push.js │ │ ├── rnotwhite.js │ │ ├── slice.js │ │ ├── strundefined.js │ │ ├── support.js │ │ └── toString.js │ │ └── wrap.js └── underscore │ ├── .bower.json │ ├── .eslintrc │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── bower.json │ ├── component.json │ ├── package.json │ ├── underscore-min.js │ ├── underscore-min.map │ └── underscore.js ├── dict ├── adj.exc.json ├── adv.exc.json ├── index.adj.json ├── index.adv.json ├── index.noun.json ├── index.verb.json ├── noun.exc.json └── verb.exc.json ├── html └── lemmatizer_sample.html ├── js └── lemmatizer.js └── test ├── lemmatizer_qunit.html └── lemmatizer_qunit.js /.gitignore: -------------------------------------------------------------------------------- 1 | /work 2 | .DS_Store 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Takafumi Yamano 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | JavaScript Lemmatizer 2 | ==== 3 | 4 | JavaScript Lemmatizer is a lemmatization library for JavaScript to retrieve a base form from an inflected form word in English. 5 | 6 | Inspired by [Ruby Lemmatizer](https://github.com/yohasebe/lemmatizer) but the returned values and the algorithm are different from it. 7 | 8 | ## Requirements 9 | 10 | Depends on Underscore.js. 11 | 12 | - [Underscore.js](http://underscorejs.org/) 13 | 14 | ## Demo 15 | 16 | A sample html is bundled in this library. The sample html code and the demo page are followings. 17 | 18 | - [lemmatizer_sample.html](https://github.com/takafumir/javascript-lemmatizer/blob/master/html/lemmatizer_sample.html) 19 | - [Demo page](http://takafumir.github.io/javascript-lemmatizer/html/lemmatizer_sample.html) 20 | 21 | The sample html code depends on jQuery. 22 | 23 | - [jQuery](http://jquery.com/) 24 | 25 | ## Check 26 | 27 | The operation check is conducted in the following web browsers with Mac OS X. 28 | 29 | - Firefox 35.0 30 | - Google Chrome 40.0 31 | - Safari 6.1.6 32 | - Opera 25.0 33 | 34 | ## Install 35 | ##### 1. Download and unzip JavaScript Lemmatizer, and then put it in your project. 36 | 37 | Directories of dict, js in JavaScript Lemmatizer are must, so you can put it in your project like this. 38 | 39 | ``` 40 | your-project 41 | ├ index.html 42 | ├ javascript-lemmatizer 43 | ├ dict 44 | ├ js 45 | ├ bower_components 46 | ├ jquery 47 | ├ underscore 48 | ``` 49 | 50 | ##### 2. Load Underscore.js and JavaScript Lemmatizer in your HTML like the following code. 51 | 52 | ```html 53 | 54 | 55 | ``` 56 | 57 | Or you can load Underscore.js the way you like. 58 | 59 | As an option, you can load jQuery, if you need it in your project. 60 | 61 | ```html 62 | 63 | 64 | 65 | ``` 66 | 67 | ##### 3. Use JavaScript Lemmatizer in your JavaScript code according to the Usage. 68 | 69 | See also. 70 | - [lemmatizer_sample.html](https://github.com/takafumir/javascript-lemmatizer/blob/master/html/lemmatizer_sample.html) 71 | - [Demo page](http://takafumir.github.io/javascript-lemmatizer/html/lemmatizer_sample.html) 72 | 73 | ## Usage 74 | 75 | You can use `Lemmatizer#lemmas` or `Lemmatizer#only_lemmas` methods like the follwoing sample in your JavaScript code. 76 | 77 | ```javascript 78 | // initialize Lemmatizer. 79 | var lemmatizer = new Lemmatizer(); 80 | 81 | // retrieve a lemma with a part of speech. 82 | // you can assign 'verb' or 'noun' or 'adj' or 'adv' as a part of speech. 83 | lemmatizer.lemmas('desks', 'noun'); // => [ ['desk', 'noun'] ] 84 | lemmatizer.lemmas('talked', 'verb'); // => [ ['talk', 'verb'] ] 85 | lemmatizer.lemmas('coded', 'verb'); // => [ ['code', 'verb'] ] 86 | 87 | // of course, available for irregular iflected form words. 88 | lemmatizer.lemmas('went', 'verb'); // => [ ['go', 'verb'] ] 89 | lemmatizer.lemmas('written', 'verb'); // => [ ['write', 'verb'] ] 90 | lemmatizer.lemmas('better', 'adj'); // => [ ['better', 'adj'], ['good', 'adj'] ] 91 | 92 | // when multiple base forms are found, return all of them. 93 | lemmatizer.lemmas('leaves', 'noun'); // => [ ['leave', 'noun'], ['leaf', 'noun'] ] 94 | 95 | // retrieve a lemma without a part of speech. 96 | lemmatizer.lemmas('sitting'); // => [ ['sit', 'verb'], ['sitting', 'noun'], ['sitting', 'adj'] ] 97 | lemmatizer.lemmas('oxen'); // => [ ['oxen', 'noun'], ['ox', 'noun'] ] 98 | lemmatizer.lemmas('leaves'); // => [ ['leave', 'verb'], ['leave', 'noun'], ['leaf', 'noun'] ] 99 | 100 | // retrieve only lemmas not including part of speeches in the returned value. 101 | lemmatizer.only_lemmas('desks', 'noun'); // => [ 'desk' ] 102 | lemmatizer.only_lemmas('coded', 'verb'); // => [ 'code' ] 103 | lemmatizer.only_lemmas('priorities'); // => [ 'priority' ] 104 | lemmatizer.only_lemmas('leaves'); // => [ 'leave', 'leaf' ] 105 | ``` 106 | 107 | See also. 108 | - [lemmatizer_sample.html](https://github.com/takafumir/javascript-lemmatizer/blob/master/html/lemmatizer_sample.html) 109 | - [Demo page](http://takafumir.github.io/javascript-lemmatizer/html/lemmatizer_sample.html) 110 | 111 | ## Limitations 112 | ```javascript 113 | // Lemmatizer leaves alone a word not included in it's dictionary index. 114 | lemmatizer.lemmas('MacBooks', 'noun'); // => [ ['MacBooks', 'noun'] ] 115 | ``` 116 | 117 | ## Changelog 118 | 119 | ##### v0.0.2 120 | 2015/01/30 121 | With v0.0.2, a returned value includes the input form word, when the input form word is included in the lemma dictionary index like the following. 122 | ```javascript 123 | lemmatizer.lemmas('matter'); // => [ ['matter', 'verb'], ['matter', 'noun'], ['matte', 'adj'], ['matt', 'adj'], ['mat', 'adj'] ] 124 | ``` 125 | 126 | With v0.0.1 127 | ```javascript 128 | lemmatizer.lemmas('matter'); // => [ ['matte', 'adj'], ['matt', 'adj'], ['mat', 'adj'] ] 129 | ``` 130 | 131 | ##### v0.0.1 132 | 2015/01/27 133 | Released JavaScript Lemmatizer v0.0.1 134 | 135 | ## Contribution 136 | 137 | 1. Fork it ( https://github.com/takafumir/javascript-lemmatizer/fork ) 138 | 1. Create your feature branch (git checkout -b my-new-feature) 139 | 1. Commit your changes (git commit -am 'Add some feature') 140 | 1. Push to the branch (git push origin my-new-feature) 141 | 1. Create a new Pull Request 142 | 143 | ## Licence 144 | 145 | [MIT License](https://github.com/takafumir/javascript-lemmatizer/blob/master/LICENSE) 146 | 147 | ## Author 148 | 149 | [Takafumi Yamano](https://github.com/takafumir) 150 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "JavaScript Lemmatizer", 3 | "version": "0.0.2", 4 | "homepage": "https://github.com/takafumir/javascript-lemmatizer", 5 | "authors": [ 6 | "Takafumi Yamano" 7 | ], 8 | "description": "JavaScript Lemmatizer is a lemmatization library to retrieve a base form from an inflected form word in English.", 9 | "keywords": [ 10 | "javascript", 11 | "lemmatizer", 12 | "lemmatization" 13 | ], 14 | "license": "MIT", 15 | "ignore": [ 16 | "**/.*", 17 | "node_modules", 18 | "bower_components", 19 | "test", 20 | "tests" 21 | ], 22 | "dependencies": { 23 | "jquery": "~2.1.3", 24 | "underscore": "~1.7.0" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /bower_components/jquery/.bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jquery", 3 | "version": "2.1.3", 4 | "main": "dist/jquery.js", 5 | "license": "MIT", 6 | "ignore": [ 7 | "**/.*", 8 | "build", 9 | "speed", 10 | "test", 11 | "*.md", 12 | "AUTHORS.txt", 13 | "Gruntfile.js", 14 | "package.json" 15 | ], 16 | "devDependencies": { 17 | "sizzle": "2.1.1-jquery.2.1.2", 18 | "requirejs": "2.1.10", 19 | "qunit": "1.14.0", 20 | "sinon": "1.8.1" 21 | }, 22 | "keywords": [ 23 | "jquery", 24 | "javascript", 25 | "library" 26 | ], 27 | "homepage": "https://github.com/jquery/jquery", 28 | "_release": "2.1.3", 29 | "_resolution": { 30 | "type": "version", 31 | "tag": "2.1.3", 32 | "commit": "8f2a9d9272d6ed7f32d3a484740ab342c02541e0" 33 | }, 34 | "_source": "git://github.com/jquery/jquery.git", 35 | "_target": "~2.1.3", 36 | "_originalSource": "jquery", 37 | "_direct": true 38 | } -------------------------------------------------------------------------------- /bower_components/jquery/MIT-LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2014 jQuery Foundation and other contributors 2 | http://jquery.com/ 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /bower_components/jquery/bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jquery", 3 | "version": "2.1.3", 4 | "main": "dist/jquery.js", 5 | "license": "MIT", 6 | "ignore": [ 7 | "**/.*", 8 | "build", 9 | "speed", 10 | "test", 11 | "*.md", 12 | "AUTHORS.txt", 13 | "Gruntfile.js", 14 | "package.json" 15 | ], 16 | "devDependencies": { 17 | "sizzle": "2.1.1-jquery.2.1.2", 18 | "requirejs": "2.1.10", 19 | "qunit": "1.14.0", 20 | "sinon": "1.8.1" 21 | }, 22 | "keywords": [ 23 | "jquery", 24 | "javascript", 25 | "library" 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /bower_components/jquery/src/ajax/jsonp.js: -------------------------------------------------------------------------------- 1 | define([ 2 | "../core", 3 | "./var/nonce", 4 | "./var/rquery", 5 | "../ajax" 6 | ], function( jQuery, nonce, rquery ) { 7 | 8 | var oldCallbacks = [], 9 | rjsonp = /(=)\?(?=&|$)|\?\?/; 10 | 11 | // Default jsonp settings 12 | jQuery.ajaxSetup({ 13 | jsonp: "callback", 14 | jsonpCallback: function() { 15 | var callback = oldCallbacks.pop() || ( jQuery.expando + "_" + ( nonce++ ) ); 16 | this[ callback ] = true; 17 | return callback; 18 | } 19 | }); 20 | 21 | // Detect, normalize options and install callbacks for jsonp requests 22 | jQuery.ajaxPrefilter( "json jsonp", function( s, originalSettings, jqXHR ) { 23 | 24 | var callbackName, overwritten, responseContainer, 25 | jsonProp = s.jsonp !== false && ( rjsonp.test( s.url ) ? 26 | "url" : 27 | typeof s.data === "string" && !( s.contentType || "" ).indexOf("application/x-www-form-urlencoded") && rjsonp.test( s.data ) && "data" 28 | ); 29 | 30 | // Handle iff the expected data type is "jsonp" or we have a parameter to set 31 | if ( jsonProp || s.dataTypes[ 0 ] === "jsonp" ) { 32 | 33 | // Get callback name, remembering preexisting value associated with it 34 | callbackName = s.jsonpCallback = jQuery.isFunction( s.jsonpCallback ) ? 35 | s.jsonpCallback() : 36 | s.jsonpCallback; 37 | 38 | // Insert callback into url or form data 39 | if ( jsonProp ) { 40 | s[ jsonProp ] = s[ jsonProp ].replace( rjsonp, "$1" + callbackName ); 41 | } else if ( s.jsonp !== false ) { 42 | s.url += ( rquery.test( s.url ) ? "&" : "?" ) + s.jsonp + "=" + callbackName; 43 | } 44 | 45 | // Use data converter to retrieve json after script execution 46 | s.converters["script json"] = function() { 47 | if ( !responseContainer ) { 48 | jQuery.error( callbackName + " was not called" ); 49 | } 50 | return responseContainer[ 0 ]; 51 | }; 52 | 53 | // force json dataType 54 | s.dataTypes[ 0 ] = "json"; 55 | 56 | // Install callback 57 | overwritten = window[ callbackName ]; 58 | window[ callbackName ] = function() { 59 | responseContainer = arguments; 60 | }; 61 | 62 | // Clean-up function (fires after converters) 63 | jqXHR.always(function() { 64 | // Restore preexisting value 65 | window[ callbackName ] = overwritten; 66 | 67 | // Save back as free 68 | if ( s[ callbackName ] ) { 69 | // make sure that re-using the options doesn't screw things around 70 | s.jsonpCallback = originalSettings.jsonpCallback; 71 | 72 | // save the callback name for future use 73 | oldCallbacks.push( callbackName ); 74 | } 75 | 76 | // Call if it was a function and we have a response 77 | if ( responseContainer && jQuery.isFunction( overwritten ) ) { 78 | overwritten( responseContainer[ 0 ] ); 79 | } 80 | 81 | responseContainer = overwritten = undefined; 82 | }); 83 | 84 | // Delegate to script 85 | return "script"; 86 | } 87 | }); 88 | 89 | }); 90 | -------------------------------------------------------------------------------- /bower_components/jquery/src/ajax/load.js: -------------------------------------------------------------------------------- 1 | define([ 2 | "../core", 3 | "../core/parseHTML", 4 | "../ajax", 5 | "../traversing", 6 | "../manipulation", 7 | "../selector", 8 | // Optional event/alias dependency 9 | "../event/alias" 10 | ], function( jQuery ) { 11 | 12 | // Keep a copy of the old load method 13 | var _load = jQuery.fn.load; 14 | 15 | /** 16 | * Load a url into a page 17 | */ 18 | jQuery.fn.load = function( url, params, callback ) { 19 | if ( typeof url !== "string" && _load ) { 20 | return _load.apply( this, arguments ); 21 | } 22 | 23 | var selector, type, response, 24 | self = this, 25 | off = url.indexOf(" "); 26 | 27 | if ( off >= 0 ) { 28 | selector = jQuery.trim( url.slice( off ) ); 29 | url = url.slice( 0, off ); 30 | } 31 | 32 | // If it's a function 33 | if ( jQuery.isFunction( params ) ) { 34 | 35 | // We assume that it's the callback 36 | callback = params; 37 | params = undefined; 38 | 39 | // Otherwise, build a param string 40 | } else if ( params && typeof params === "object" ) { 41 | type = "POST"; 42 | } 43 | 44 | // If we have elements to modify, make the request 45 | if ( self.length > 0 ) { 46 | jQuery.ajax({ 47 | url: url, 48 | 49 | // if "type" variable is undefined, then "GET" method will be used 50 | type: type, 51 | dataType: "html", 52 | data: params 53 | }).done(function( responseText ) { 54 | 55 | // Save response for use in complete callback 56 | response = arguments; 57 | 58 | self.html( selector ? 59 | 60 | // If a selector was specified, locate the right elements in a dummy div 61 | // Exclude scripts to avoid IE 'Permission Denied' errors 62 | jQuery("
").append( jQuery.parseHTML( responseText ) ).find( selector ) : 63 | 64 | // Otherwise use the full result 65 | responseText ); 66 | 67 | }).complete( callback && function( jqXHR, status ) { 68 | self.each( callback, response || [ jqXHR.responseText, status, jqXHR ] ); 69 | }); 70 | } 71 | 72 | return this; 73 | }; 74 | 75 | }); 76 | -------------------------------------------------------------------------------- /bower_components/jquery/src/ajax/parseJSON.js: -------------------------------------------------------------------------------- 1 | define([ 2 | "../core" 3 | ], function( jQuery ) { 4 | 5 | // Support: Android 2.3 6 | // Workaround failure to string-cast null input 7 | jQuery.parseJSON = function( data ) { 8 | return JSON.parse( data + "" ); 9 | }; 10 | 11 | return jQuery.parseJSON; 12 | 13 | }); 14 | -------------------------------------------------------------------------------- /bower_components/jquery/src/ajax/parseXML.js: -------------------------------------------------------------------------------- 1 | define([ 2 | "../core" 3 | ], function( jQuery ) { 4 | 5 | // Cross-browser xml parsing 6 | jQuery.parseXML = function( data ) { 7 | var xml, tmp; 8 | if ( !data || typeof data !== "string" ) { 9 | return null; 10 | } 11 | 12 | // Support: IE9 13 | try { 14 | tmp = new DOMParser(); 15 | xml = tmp.parseFromString( data, "text/xml" ); 16 | } catch ( e ) { 17 | xml = undefined; 18 | } 19 | 20 | if ( !xml || xml.getElementsByTagName( "parsererror" ).length ) { 21 | jQuery.error( "Invalid XML: " + data ); 22 | } 23 | return xml; 24 | }; 25 | 26 | return jQuery.parseXML; 27 | 28 | }); 29 | -------------------------------------------------------------------------------- /bower_components/jquery/src/ajax/script.js: -------------------------------------------------------------------------------- 1 | define([ 2 | "../core", 3 | "../ajax" 4 | ], function( jQuery ) { 5 | 6 | // Install script dataType 7 | jQuery.ajaxSetup({ 8 | accepts: { 9 | script: "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript" 10 | }, 11 | contents: { 12 | script: /(?:java|ecma)script/ 13 | }, 14 | converters: { 15 | "text script": function( text ) { 16 | jQuery.globalEval( text ); 17 | return text; 18 | } 19 | } 20 | }); 21 | 22 | // Handle cache's special case and crossDomain 23 | jQuery.ajaxPrefilter( "script", function( s ) { 24 | if ( s.cache === undefined ) { 25 | s.cache = false; 26 | } 27 | if ( s.crossDomain ) { 28 | s.type = "GET"; 29 | } 30 | }); 31 | 32 | // Bind script tag hack transport 33 | jQuery.ajaxTransport( "script", function( s ) { 34 | // This transport only deals with cross domain requests 35 | if ( s.crossDomain ) { 36 | var script, callback; 37 | return { 38 | send: function( _, complete ) { 39 | script = jQuery(" 9 | 10 | 11 | 12 | 68 | 69 |
70 | English word : 71 |
72 | Part of speech: 73 | 80 |
81 | 82 |

83 | *** Show base form here *** 84 |
85 | 86 | 87 | 91 | 92 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /js/lemmatizer.js: -------------------------------------------------------------------------------- 1 | /* 2 | * JavaScript Lemmatizer v0.0.2 3 | * https://github.com/takafumir/javascript-lemmatizer 4 | * MIT License 5 | * by Takafumi Yamano 6 | */ 7 | 8 | // extend String and define String#endsWith 9 | if (typeof String.endsWith !== "function") { 10 | String.prototype.endsWith = function(suffix) { 11 | return this.indexOf(suffix, this.length - suffix.length) !== -1; 12 | }; 13 | } 14 | 15 | // Lemmatizer constructor 16 | var Lemmatizer = function() { 17 | this.wn_files = { 18 | noun: [ 19 | '../dict/index.noun.json', 20 | '../dict/noun.exc.json' 21 | ], 22 | verb: [ 23 | '../dict/index.verb.json', 24 | '../dict/verb.exc.json' 25 | ], 26 | adj: [ 27 | '../dict/index.adj.json', 28 | '../dict/adj.exc.json' 29 | ], 30 | adv: [ 31 | '../dict/index.adv.json', 32 | '../dict/adv.exc.json' 33 | ] 34 | }; 35 | 36 | this.morphological_substitutions = { 37 | noun: [ 38 | ['ies', 'y' ], 39 | ['ves', 'f' ], 40 | ['men', 'man'] 41 | ], 42 | verb: [ 43 | ['ies', 'y'], 44 | ['ied', 'y'], 45 | ['cked', 'c'], 46 | ['cked', 'ck'], 47 | ['able', 'e'], 48 | ['able', ''], 49 | ['ability', 'e'], 50 | ['ability', ''] 51 | ], 52 | adj: [ 53 | ['er', '' ], 54 | ['est', '' ], 55 | ['er', 'e'], 56 | ['est', 'e'], 57 | ['ier', 'y'], 58 | ['iest', 'y'] 59 | ], 60 | adv: [ 61 | ['er', '' ], 62 | ['est', '' ], 63 | ['er', 'e'], 64 | ['est', 'e'], 65 | ['ier', 'y'], 66 | ['iest', 'y'] 67 | ] 68 | }; 69 | 70 | this.wordlists = {}; 71 | this.exceptions = {}; 72 | 73 | // initialize wordlists and exceptions 74 | for (var key in this.morphological_substitutions) { 75 | this.wordlists[key] = {}; 76 | this.exceptions[key] = {}; 77 | } 78 | 79 | // store dictionary data to localStorage from wn_files 80 | for (var pos in this.wn_files) { 81 | this.load_wordnet_files(pos, this.wn_files[pos][0], this.wn_files[pos][1]); 82 | } 83 | 84 | // fetch dictionary data from localStorage, then set up wordlists and exceptions 85 | for (var pos in this.wn_files) { 86 | this.setup_dic_data(pos); 87 | } 88 | }; 89 | 90 | // Lemmatizer properties 91 | Lemmatizer.prototype = { 92 | form: '', 93 | idx: '_idx', 94 | exc: '_exc', 95 | lems: [], // -> [ ["lemma1", "verb"], ["lemma2", "noun"]... ] 96 | 97 | // ************************************************** 98 | // public 99 | // ************************************************** 100 | // reuturn Array of ["lemma", "pos"] pairs 101 | // like [ ["lemma1", "verb"], ["lemma2", "noun"]... ] 102 | lemmas: function(form, pos) { 103 | var self = this; 104 | this.lems = []; 105 | this.form = form; 106 | 107 | var parts = ['verb', 'noun', 'adj', 'adv']; 108 | if ( pos && !_.include( parts, pos ) ) { 109 | console.log("warning: pos must be 'verb' or 'noun' or 'adj' or 'adv'."); 110 | return; 111 | } 112 | 113 | if (!pos) { 114 | _.each( parts, function(pos) { self.irregular_bases(pos); } ); 115 | _.each( parts, function(pos) { self.regular_bases(pos); } ); 116 | 117 | // when lemma not found and the form is included in wordlists. 118 | if ( this.is_lemma_empty() ) { 119 | _.chain(parts) 120 | .select( function(pos) { return self.wordlists[pos][form]; } ) 121 | .each( function(pos) { self.lems.push([ form, pos ]); } ); 122 | } 123 | // when lemma not found and the form is not included in wordlists. 124 | if ( this.is_lemma_empty() ) { 125 | this.lems.push([ form, '' ]); 126 | } 127 | } else { 128 | this.base_forms(pos); 129 | if ( this.is_lemma_empty() ) { 130 | this.lems.push([ form, pos ]); 131 | } 132 | } 133 | 134 | // sort to verb -> noun -> adv -> adj 135 | return _.sortBy( this.uniq_lemmas(this.lems), function(val) { return val[1]; } ).reverse(); 136 | }, 137 | 138 | // return only uniq lemmas without pos like [ 'high' ] or [ 'leave', 'leaf' ] 139 | only_lemmas: function(form, pos) { 140 | var result = _.map( this.lemmas(form, pos), function(val) { return val[0]; } ); 141 | return _.uniq(result); 142 | }, 143 | 144 | 145 | // ************************************************** 146 | // private 147 | // The following properties(methods) are only used by 148 | // Lemmatizer inside, so don't call them from outside. 149 | // ************************************************** 150 | is_lemma_empty: function() { 151 | return this.lems.length === 0; 152 | }, 153 | 154 | // set up dictionary data 155 | load_wordnet_files: function(pos, list, exc) { 156 | var key_idx = pos + this.idx; 157 | this.open_file(key_idx, list); 158 | var key_exc = pos + this.exc; 159 | this.open_file(key_exc, exc); 160 | }, 161 | 162 | setup_dic_data: function(pos) { 163 | var self = this; 164 | var key_idx = pos + this.idx; 165 | _.each( this.fetch_data(key_idx), function(w) { 166 | self.wordlists[pos][w] = w; 167 | }); 168 | var key_exc = pos + this.exc; 169 | _.each( this.fetch_data(key_exc), function(item) { 170 | var w = item[0]; 171 | var s = item[1]; 172 | self.exceptions[pos][w] = s; 173 | }); 174 | }, 175 | 176 | open_file: function(key, file) { 177 | if (!localStorage.getItem(key)) { 178 | var xhr = new XMLHttpRequest(); 179 | xhr.open("GET", file, false); 180 | xhr.send(); 181 | var data = xhr.responseText; 182 | this.store_data(key, data); 183 | } 184 | }, 185 | 186 | store_data: function(key, data) { 187 | localStorage.setItem(key, data); 188 | }, 189 | 190 | fetch_data: function(key) { 191 | var data = JSON.parse(localStorage.getItem(key)); 192 | return data; 193 | }, 194 | // end of set up dictionary data 195 | 196 | base_forms: function(pos) { 197 | this.irregular_bases(pos); 198 | this.regular_bases(pos); 199 | }, 200 | 201 | // build array lemmas(this.lems) like [ [lemma1, "verb"], [lemma2, "noun"]... ] 202 | irregular_bases: function(pos) { 203 | if (this.exceptions[pos][this.form] && this.exceptions[pos][this.form] !== this.form) { 204 | this.lems.push( [this.exceptions[pos][this.form], pos] ); 205 | } 206 | }, 207 | 208 | // build array lemmas(this.lems) like [ [lemma1, "verb"], [lemma2, "noun"]... ] 209 | regular_bases: function(pos) { 210 | var bases = null; 211 | // bases -> [ [lemma1, lemma2, lemma3...], pos ] 212 | switch (pos){ 213 | case 'verb': 214 | bases = this.possible_verb_bases(); 215 | break; 216 | case 'noun': 217 | bases = this.possible_noun_bases(); 218 | break; 219 | case 'adj': 220 | bases = this.possible_adj_adv_bases('adj'); 221 | break; 222 | case 'adv': 223 | bases = this.possible_adj_adv_bases('adv'); 224 | break; 225 | default: 226 | break; 227 | } 228 | if (bases) { 229 | this.check_lemmas(bases); 230 | } 231 | }, 232 | 233 | // check if possible bases are include in lemma wordlists and push 234 | check_lemmas: function(bases) { 235 | var self = this; 236 | // bases -> [ [lemma1, lemma2, lemma3...], pos ] 237 | var lemmas = bases[0]; 238 | var pos = bases[1]; 239 | _.each( lemmas, function(lemma) { 240 | if ( self.wordlists[pos][lemma] && self.wordlists[pos][lemma] === lemma ) { 241 | self.lems.push( [lemma, pos] ); 242 | } 243 | }); 244 | }, 245 | 246 | possible_verb_bases: function() { 247 | var form = this.form; 248 | var lemmas = []; 249 | 250 | if ( this.ends_with_es() ) { 251 | // goes -> go 252 | var verb_base = form.slice( 0, -2 ); 253 | lemmas.push( verb_base ); 254 | if ( !this.wordlists['verb'][verb_base] || this.wordlists['verb'][verb_base] !== verb_base ) { 255 | // opposes -> oppose 256 | lemmas.push( form.slice( 0, -1 ) ); 257 | } 258 | } else if ( this.ends_with_verb_vowel_ys() ) { 259 | // annoys -> annoy 260 | lemmas.push( form.slice( 0, -1 ) ); 261 | } else if ( form.endsWith('ed') && !form.endsWith('ied') && !form.endsWith('cked') ) { 262 | // saved -> save 263 | var past_base = form.slice( 0, -1 ); 264 | lemmas.push( past_base ); 265 | if ( !this.wordlists['verb'][past_base] || this.wordlists['verb'][past_base] !== past_base ) { 266 | // talked -> talk, but not push like coded -> cod 267 | lemmas.push( form.slice( 0, -2 ) ); 268 | } 269 | } else if ( form.endsWith('ed') && this.double_consonant('ed') ) { 270 | // dragged -> drag 271 | lemmas.push( form.slice( 0, -3 ) ); 272 | // added -> add 273 | lemmas.push( form.slice( 0, -2 ) ); 274 | // pirouetted -> pirouette 275 | lemmas.push( form.slice( 0, -2 ) + 'e' ); 276 | } else if ( form.endsWith('ing') && this.double_consonant('ing') ) { 277 | // dragging -> drag 278 | lemmas.push( form.slice( 0, -4 ) ); 279 | // adding -> add 280 | lemmas.push( form.slice( 0, -3 ) ); 281 | // pirouetting -> pirouette 282 | lemmas.push( form.slice( 0, -3 ) + 'e' ); 283 | } else if ( form.endsWith('ing') && !this.exceptions['verb'][form] ) { 284 | // coding -> code 285 | var ing_base = form.slice( 0, -3 ) + 'e'; 286 | lemmas.push( ing_base ); 287 | if ( !this.wordlists['verb'][ing_base] || this.wordlists['verb'][ing_base] !== ing_base ) { 288 | // talking -> talk, but not push like coding -> cod 289 | lemmas.push( form.slice( 0, -3 ) ); 290 | } 291 | } else if ( form.endsWith('able') && this.double_consonant('able') ) { 292 | lemmas.push( form.slice( 0, -5 ) ); 293 | } else if ( form.endsWith('ability') && this.double_consonant('ability') ) { 294 | lemmas.push( form.slice( 0, -8 ) ); 295 | } else if ( form.endsWith('s') ) { 296 | lemmas.push( form.slice( 0, -1 ) ); 297 | } 298 | 299 | _.each(this.morphological_substitutions["verb"], function(entry) { 300 | var morpho = entry[0]; 301 | var origin = entry[1]; 302 | if ( form.endsWith(morpho) ) { 303 | lemmas.push( form.slice( 0, -(morpho.length) ) + origin ); 304 | } 305 | }); 306 | 307 | lemmas.push(form); 308 | 309 | return [ lemmas, 'verb' ]; 310 | }, 311 | 312 | possible_noun_bases: function() { 313 | var form = this.form; 314 | var lemmas = []; 315 | 316 | if ( this.ends_with_es() ) { 317 | // watches -> watch 318 | var noun_base = form.slice( 0, -2 ); 319 | lemmas.push( noun_base ); 320 | if ( !this.wordlists['noun'][noun_base] || this.wordlists['noun'][noun_base] !== noun_base ) { 321 | // horses -> horse 322 | lemmas.push( form.slice( 0, -1 ) ); 323 | } 324 | } else if ( form.endsWith('s') ) { 325 | lemmas.push( form.slice( 0, -1 ) ); 326 | } 327 | 328 | _.each(this.morphological_substitutions["noun"], function(entry) { 329 | var morpho = entry[0]; 330 | var origin = entry[1]; 331 | if ( form.endsWith(morpho) ) { 332 | lemmas.push( form.slice( 0, -(morpho.length) ) + origin ); 333 | } 334 | }); 335 | 336 | // to push a word like 'us' as it is 337 | lemmas.push(form); 338 | 339 | return [ lemmas, 'noun' ]; 340 | }, 341 | 342 | possible_adj_adv_bases: function(pos) { 343 | var form = this.form; 344 | var lemmas = []; 345 | 346 | if ( form.endsWith('est') && this.double_consonant('est') ) { 347 | // biggest -> big 348 | lemmas.push( form.slice( 0, -4 ) ); 349 | } else if ( form.endsWith('er') && this.double_consonant('er') ) { 350 | // bigger -> bigger 351 | lemmas.push( form.slice( 0, -3 ) ); 352 | } 353 | 354 | _.each(this.morphological_substitutions[pos], function(entry) { 355 | var morpho = entry[0]; 356 | var origin = entry[1]; 357 | if ( form.endsWith(morpho) ) { 358 | lemmas.push( form.slice( 0, -(morpho.length) ) + origin ); 359 | } 360 | }); 361 | 362 | // to push a word like 'after' as it is 363 | lemmas.push(form); 364 | 365 | return [ lemmas, pos ]; 366 | }, 367 | 368 | double_consonant: function(suffix) { 369 | // for like bigger -> big 370 | var form = this.form; 371 | // length after removing suffix from form 372 | var len = form.length - suffix.length; 373 | return this.is_vowel(form[len - 3]) && !this.is_vowel(form[len - 2]) && form[len - 2] === form[len - 1]; 374 | }, 375 | 376 | is_vowel: function(letter) { 377 | return _.include(["a", "e", "i", "o", "u"], letter); 378 | }, 379 | 380 | // [ ["leave", "verb"], ["leaf", "noun"], ["leave", "verb"], ["leave", "noun"] ]; 381 | // -> [ ["leave", "verb"], ["leaf", "noun"], ["leave", "noun"] ]; 382 | uniq_lemmas: function(lemmas) { 383 | var u_lemmas = []; 384 | var len = lemmas.length; 385 | for (var i = 0; i < len; i++) { 386 | var val = lemmas[i]; 387 | if (!this.is_include(u_lemmas, val) && val[0].length > 1) { 388 | u_lemmas.push(val); 389 | } 390 | } 391 | return u_lemmas; 392 | }, 393 | 394 | is_include: function(lemmas, target) { 395 | var len = lemmas.length; 396 | for (var i = 0; i < len; i++) { 397 | if (lemmas[i][0] === target[0] && lemmas[i][1] === target[1]) { 398 | return true; 399 | } 400 | } 401 | return false; 402 | }, 403 | 404 | ends_with_es: function() { 405 | var result = false; 406 | var form = this.form; 407 | var ends = ['ches', 'shes', 'oes', 'ses', 'xes', 'zes']; 408 | _.each( ends, function(end) { 409 | if ( form.endsWith(end) ) { 410 | result = true; 411 | } 412 | }); 413 | return result; 414 | }, 415 | 416 | ends_with_verb_vowel_ys: function() { 417 | var result = false; 418 | var form = this.form; 419 | var ends = ['ays', 'eys', 'iys', 'oys', 'uys']; 420 | _.each( ends, function(end) { 421 | if ( form.endsWith(end) ) { 422 | result = true; 423 | } 424 | }); 425 | return result; 426 | } 427 | }; 428 | -------------------------------------------------------------------------------- /test/lemmatizer_qunit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | JavaScript Lemmatizer QUnit Tests 6 | 7 | 8 | 9 |
10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | --------------------------------------------------------------------------------