├── .gitignore ├── index.js ├── .travis.yml ├── test.js ├── .jshintrc ├── README.md ├── package.json ├── LICENSE-MIT └── lib └── treetagger.js /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require("./lib/treetagger"); 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - 0.10 4 | before_script: 5 | - npm install -g grunt-cli 6 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | var TreeTagger = require("./"); 2 | var tagger = new TreeTagger(); 3 | tagger.tag("This is a test!", function (err, results) { 4 | console.log(results); 5 | }); 6 | -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "curly": true, 3 | "eqeqeq": true, 4 | "immed": true, 5 | "latedef": true, 6 | "newcap": true, 7 | "noarg": true, 8 | "sub": true, 9 | "undef": true, 10 | "unused": true, 11 | "boss": true, 12 | "eqnull": true, 13 | "node": true, 14 | "es5": true 15 | } 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # treetagger 2 | 3 | Node.js module for interfacing with the TreeTagger toolkit by Helmut Schmid. 4 | 5 | ## Getting Started 6 | Install the module with: `npm install treetagger` 7 | 8 | ```javascript 9 | var Treetagger = require('treetagger'); 10 | var tagger = new Treetagger(); 11 | tagger.tag("This is a test!", function (err, results) { 12 | console.log(results); 13 | }); 14 | 15 | /* 16 | [ { t: 'This', pos: 'DT', l: 'this' }, 17 | { t: 'is', pos: 'VBZ', l: 'be' }, 18 | { t: 'a', pos: 'DT', l: 'a' }, 19 | { t: 'test', pos: 'NN', l: 'test' }, 20 | { t: '!', pos: 'SENT', l: '!' } ] 21 | */ 22 | 23 | ``` 24 | 25 | ## License 26 | Copyright (c) 2013 Thomas Holloway 27 | Licensed under the MIT license. 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "treetagger", 3 | "description": "Node.js module for interfacing with the TreeTagger toolkit by Helmut Schmid.", 4 | "version": "0.1.1", 5 | "homepage": "https://github.com/nyxtom/treetagger", 6 | "author": { 7 | "name": "Thomas Holloway", 8 | "email": "nyxtom@gmail.com" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git://github.com/nyxtom/treetagger.git" 13 | }, 14 | "bugs": { 15 | "url": "https://github.com/nyxtom/treetagger/issues" 16 | }, 17 | "licenses": [ 18 | { 19 | "type": "MIT", 20 | "url": "https://github.com/nyxtom/treetagger/blob/master/LICENSE-MIT" 21 | } 22 | ], 23 | "main": "lib/treetagger", 24 | "engines": { 25 | "node": ">= 0.8.0" 26 | }, 27 | "scripts": { 28 | "test": "grunt nodeunit" 29 | }, 30 | "devDependencies": { 31 | "grunt-contrib-jshint": "~0.1.1", 32 | "grunt-contrib-nodeunit": "~0.1.2", 33 | "grunt-contrib-watch": "~0.2.0", 34 | "grunt": "~0.4.1" 35 | }, 36 | "keywords": [] 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Thomas Holloway 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /lib/treetagger.js: -------------------------------------------------------------------------------- 1 | /* 2 | * treetagger 3 | * https://github.com/nyxtom/treetagger 4 | * 5 | * Copyright (c) 2015 Thomas Holloway, Mathieu Prevel 6 | * Licensed under the MIT license. 7 | */ 8 | 9 | 'use strict'; 10 | 11 | var events = require('events'), 12 | util = require('util'), 13 | fs = require('fs'), 14 | path = require('path'), 15 | spawn = require('child_process').spawn; 16 | 17 | var languages = { 18 | 'utf8': [ 'bulgarian', 'english', 'finnish', 'galician', 'italian', 'polish', 'slovak', 19 | 'swahili', 'dutch', 'estonian', 'french', 'german', 'latin', 'russian' , 'spanish' ] 20 | }; 21 | 22 | function TreeTagger(options) { 23 | events.EventEmitter.call(this); 24 | this.options = options || {}; 25 | this.encoding = this.options.encoding || 'utf8'; 26 | this.language = this.options.language || 'english'; 27 | this.paths = ['.', '/usr/bin', '/usr/local/bin', '/opt/local/bin', 28 | '/Applications/bin', '~/bin', '~/Applications/bin' ]; 29 | this.envVars = ["TREETAGGER", "TREETAGGER_HOME"]; 30 | 31 | // Validate the given encoding and language selected 32 | if (!this.encoding in languages) { 33 | throw new Error("Unsupported encoding detected " + this.encoding); 34 | } 35 | if (languages[this.encoding].indexOf(this.language) < 0) { 36 | throw new Error("Unsupported language detected " + this.language + " for encoding " + this.encoding); 37 | } 38 | 39 | // Set the appropriate bin path 40 | this.binPath = 'tree-tagger-' + this.language; 41 | 42 | this.binPath = findPath(this.binPath, this.paths, this.envVars); 43 | }; 44 | 45 | TreeTagger.prototype.split = function (text) { 46 | var results = []; 47 | var lines = text.trim().split('\n'); 48 | for (var i = 0; i < lines.length; i++) { 49 | var line = lines[i]; 50 | var items = line.split('\t'); 51 | var item = {}; 52 | item.t = items[0]; 53 | item.pos = items[1]; 54 | item.l = items[2]; 55 | results.push(item); 56 | } 57 | return results; 58 | }; 59 | 60 | TreeTagger.prototype.tag = function (text, callback) { 61 | var proc = spawn(this.binPath); 62 | var stdout = [], stderr = [], size = 0; 63 | var _this = this; 64 | proc.stdout.on('data', function (buffer) { 65 | size += buffer.length; 66 | stdout[stdout.length] = buffer; 67 | }); 68 | proc.stderr.on('data', function (buffer) { 69 | stderr[stderr.length] = buffer; 70 | }); 71 | proc.on('error', function (err) { 72 | callback(err); 73 | }); 74 | var exitCode; 75 | proc.on('exit', function (code) { 76 | exitCode = code; 77 | }); 78 | proc.on('close', function () { 79 | if (exitCode > 0) { 80 | callback(new Error(stderr.join(""))); 81 | } 82 | else { 83 | var buffer = new Buffer(size); 84 | var start = 0; 85 | for (var i = 0, l = stdout.length; i < l; i++) { 86 | var chunk = stdout[i]; 87 | chunk.copy(buffer, start); 88 | start += chunk.length; 89 | } 90 | var output = buffer.toString(); 91 | var results = _this.split(output); 92 | callback(null, results); 93 | } 94 | }); 95 | 96 | proc.stdin.setEncoding(_this.encoding) 97 | proc.stdin.write(text); 98 | proc.stdin.end(); 99 | }; 100 | 101 | function findPath(binName, paths, envVars) { 102 | for (var i = 0; i < paths.length; i++) { 103 | var p = path.join(paths[i], binName); 104 | if (fs.existsSync(p)) { 105 | return p; 106 | } 107 | } 108 | 109 | for (var i = 0; i < envVars.length; i++) { 110 | var envVar = envVars[i]; 111 | if (envVar in process.env) { 112 | var p = path.join(process.env[envVar], binName); 113 | if (fs.existsSync(p)) { 114 | return p; 115 | } 116 | } 117 | } 118 | 119 | throw new Error(binName + " was not found on the executable PATH directive"); 120 | }; 121 | 122 | module.exports = TreeTagger; 123 | --------------------------------------------------------------------------------