├── Rakefile ├── lib ├── engtagger │ ├── version.rb │ ├── pos_words.hash │ ├── unknown.yml │ ├── porter.rb │ ├── tags.yml │ └── pos_tags.hash └── engtagger.rb ├── Gemfile ├── .gitignore ├── engtagger.gemspec ├── README.md ├── test └── test_engtagger.rb └── LICENSE /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | require "bundler/gem_tasks" 3 | -------------------------------------------------------------------------------- /lib/engtagger/version.rb: -------------------------------------------------------------------------------- 1 | module EngTagger 2 | VERSION = "0.1.2" 3 | end 4 | -------------------------------------------------------------------------------- /lib/engtagger/pos_words.hash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/diasks2/engtagger-1/master/lib/engtagger/pos_words.hash -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in engtagger.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | -------------------------------------------------------------------------------- /lib/engtagger/unknown.yml: -------------------------------------------------------------------------------- 1 | --- #YAML:1.0 2 | "-abr-": { nnp: 1000 } 3 | "-cap-": { nnp: 900, nn: 48, nns: 48, vbg: 2, vbz: 2 } 4 | "-ed-": { vbn: 300, nn: 300, jj: 200, vbd: 200 } 5 | "-hyp-": { jj: 530, nn: 470 } 6 | "-hyp-adj-": { jj: 850, nn: 150 } 7 | "-ing-": { vbg: 800, jj: 180, nnp: 10, nn: 10 } 8 | "-ly-": { rb: 900, jj: 100 } 9 | "-s-": { nnp: 48, nn: 48, nns: 900, vbg: 2, vbz: 2 } 10 | "-sym-": { sym: 1000 } 11 | "-tion-": { nn: 950, nnp: 50 } 12 | "-unknown-": { nn: 875, jj: 195, nns: 20, vbz: 10 } 13 | -------------------------------------------------------------------------------- /engtagger.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path('../lib/engtagger/version', __FILE__) 3 | 4 | Gem::Specification.new do |gem| 5 | gem.authors = ["Yoichiro Hasebe"] 6 | gem.email = ["yohasebe@gmail.com"] 7 | gem.summary = %q{A probability based, corpus-trained English POS tagger} 8 | gem.description = %q{A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained tagger that assigns POS tags to English text based on a lookup dictionary and a set of probability values.} 9 | gem.homepage = "http://github.com/yohasebe/engtagger" 10 | 11 | gem.files = `git ls-files`.split($\) 12 | gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } 13 | gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) 14 | gem.name = "engtagger" 15 | gem.require_paths = ["lib"] 16 | gem.version = EngTagger::VERSION 17 | end 18 | -------------------------------------------------------------------------------- /lib/engtagger/porter.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # -*- coding: utf-8 -*- 3 | 4 | module Stemmable 5 | 6 | STEP_2_LIST = { 7 | 'ational'=>'ate', 'tional'=>'tion', 'enci'=>'ence', 'anci'=>'ance', 8 | 'izer'=>'ize', 'bli'=>'ble', 9 | 'alli'=>'al', 'entli'=>'ent', 'eli'=>'e', 'ousli'=>'ous', 10 | 'ization'=>'ize', 'ation'=>'ate', 11 | 'ator'=>'ate', 'alism'=>'al', 'iveness'=>'ive', 'fulness'=>'ful', 12 | 'ousness'=>'ous', 'aliti'=>'al', 13 | 'iviti'=>'ive', 'biliti'=>'ble', 'logi'=>'log' 14 | } 15 | 16 | STEP_3_LIST = { 17 | 'icate'=>'ic', 'ative'=>'', 'alize'=>'al', 'iciti'=>'ic', 18 | 'ical'=>'ic', 'ful'=>'', 'ness'=>'' 19 | } 20 | 21 | 22 | SUFFIX_1_REGEXP = /( 23 | ational | 24 | tional | 25 | enci | 26 | anci | 27 | izer | 28 | bli | 29 | alli | 30 | entli | 31 | eli | 32 | ousli | 33 | ization | 34 | ation | 35 | ator | 36 | alism | 37 | iveness | 38 | fulness | 39 | ousness | 40 | aliti | 41 | iviti | 42 | biliti | 43 | logi)$/x 44 | 45 | 46 | SUFFIX_2_REGEXP = /( 47 | al | 48 | ance | 49 | ence | 50 | er | 51 | ic | 52 | able | 53 | ible | 54 | ant | 55 | ement | 56 | ment | 57 | ent | 58 | ou | 59 | ism | 60 | ate | 61 | iti | 62 | ous | 63 | ive | 64 | ize)$/x 65 | 66 | 67 | C = "[^aeiou]" # consonant 68 | V = "[aeiouy]" # vowel 69 | CC = "#{C}(?>[^aeiouy]*)" # consonant sequence 70 | VV = "#{V}(?>[aeiou]*)" # vowel sequence 71 | 72 | MGR0 = /^(#{CC})?#{VV}#{CC}/o # [cc]vvcc... is m>0 73 | MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1 74 | MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1 75 | VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem 76 | 77 | # 78 | # Porter stemmer in Ruby. 79 | # 80 | # This is the Porter stemming algorithm, ported to Ruby from the 81 | # version coded up in Perl. It's easy to follow against the rules 82 | # in the original paper in: 83 | # 84 | # Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, 85 | # no. 3, pp 130-137, 86 | # 87 | # See also http://www.tartarus.org/~martin/PorterStemmer 88 | # 89 | # Send comments to raypereda@hotmail.com 90 | # 91 | 92 | def stem_porter 93 | 94 | # make a copy of the given object and convert it to a string. 95 | w = self.dup.to_str 96 | 97 | return w if w.length < 3 98 | 99 | # now map initial y to Y so that the patterns never treat it as vowel 100 | w[0] = 'Y' if w[0] == ?y 101 | 102 | # Step 1a 103 | if w =~ /(ss|i)es$/ 104 | w = $` + $1 105 | elsif w =~ /([^s])s$/ 106 | w = $` + $1 107 | end 108 | 109 | # Step 1b 110 | if w =~ /eed$/ 111 | w.chop! if $` =~ MGR0 112 | elsif w =~ /(ed|ing)$/ 113 | stem = $` 114 | if stem =~ VOWEL_IN_STEM 115 | w = stem 116 | case w 117 | when /(at|bl|iz)$/ then w << "e" 118 | when /([^aeiouylsz])\1$/ then w.chop! 119 | when /^#{CC}#{V}[^aeiouwxy]$/o then w << "e" 120 | end 121 | end 122 | end 123 | 124 | if w =~ /y$/ 125 | stem = $` 126 | w = stem + "i" if stem =~ VOWEL_IN_STEM 127 | end 128 | 129 | # Step 2 130 | if w =~ SUFFIX_1_REGEXP 131 | stem = $` 132 | suffix = $1 133 | # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n" 134 | if stem =~ MGR0 135 | w = stem + STEP_2_LIST[suffix] 136 | end 137 | end 138 | 139 | # Step 3 140 | if w =~ /(icate|ative|alize|iciti|ical|ful|ness)$/ 141 | stem = $` 142 | suffix = $1 143 | if stem =~ MGR0 144 | w = stem + STEP_3_LIST[suffix] 145 | end 146 | end 147 | 148 | # Step 4 149 | if w =~ SUFFIX_2_REGEXP 150 | stem = $` 151 | if stem =~ MGR1 152 | w = stem 153 | end 154 | elsif w =~ /(s|t)(ion)$/ 155 | stem = $` + $1 156 | if stem =~ MGR1 157 | w = stem 158 | end 159 | end 160 | 161 | # Step 5 162 | if w =~ /e$/ 163 | stem = $` 164 | if (stem =~ MGR1) || 165 | (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o) 166 | w = stem 167 | end 168 | end 169 | 170 | if w =~ /ll$/ && w =~ MGR1 171 | w.chop! 172 | end 173 | 174 | # and turn initial Y back to y 175 | w[0] = 'y' if w[0] == ?Y 176 | 177 | w 178 | end 179 | 180 | 181 | # 182 | # make the stem_porter the default stem method, just in case we 183 | # feel like having multiple stemmers available later. 184 | # 185 | alias stem stem_porter 186 | 187 | end 188 | 189 | # Add stem method to all Strings 190 | class String 191 | include Stemmable 192 | end 193 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EngTagger 2 | 3 | English Part-of-Speech Tagger Library; a Ruby port of Lingua::EN::Tagger 4 | 5 | ### Description 6 | 7 | A Ruby port of Perl Lingua::EN::Tagger, a probability based, corpus-trained 8 | tagger that assigns POS tags to English text based on a lookup dictionary and 9 | a set of probability values. The tagger assigns appropriate tags based on 10 | conditional probabilities--it examines the preceding tag to determine the 11 | appropriate tag for the current word. Unknown words are classified according to 12 | word morphology or can be set to be treated as nouns or other parts of speech. 13 | The tagger also extracts as many nouns and noun phrases as it can, using a set 14 | of regular expressions. 15 | 16 | ### Features 17 | 18 | * Assigns POS tags to English text 19 | * Extract noun phrases from tagged text 20 | * etc. 21 | 22 | ### Synopsis: 23 | 24 | require 'rubygems' 25 | require 'engtagger' 26 | 27 | # Create a parser object 28 | tgr = EngTagger.new 29 | 30 | # Sample text 31 | text = "Alice chased the big fat cat." 32 | 33 | # Add part-of-speech tags to text 34 | tagged = tgr.add_tags(text) 35 | 36 | #=> "Alice chased the big fatcat ." 37 | 38 | # Get a list of all nouns and noun phrases with occurrence counts 39 | word_list = tgr.get_words(text) 40 | 41 | #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1} 42 | 43 | # Get a readable version of the tagged text 44 | readable = tgr.get_readable(text) 45 | 46 | #=> "Alice/NNP chased/VBD the/DET big/JJ fat/JJ cat/NN ./PP" 47 | 48 | # Get all nouns from a tagged output 49 | nouns = tgr.get_nouns(tagged) 50 | 51 | #=> {"cat"=>1, "Alice"=>1} 52 | 53 | # Get all proper nouns 54 | proper = tgr.get_proper_nouns(tagged) 55 | 56 | #=> {"Alice"=>1} 57 | 58 | 59 | # Get all noun phrases of any syntactic level 60 | # (same as word_list but take a tagged input) 61 | nps = tgr.get_noun_phrases(tagged) 62 | 63 | #=> {"Alice"=>1, "cat"=>1, "fat cat"=>1, "big fat cat"=>1} 64 | 65 | ### Tag Set 66 | 67 | The set of POS tags used here is a modified version of the Penn Treebank tagset. Tags with non-letter characters have been redefined to work better in our data structures. Also, the "Determiner" tag (DET) has been changed from 'DT', in order to avoid confusion with the HTML tag, `
`. 68 | 69 | CC Conjunction, coordinating and, or 70 | CD Adjective, cardinal number 3, fifteen 71 | DET Determiner this, each, some 72 | EX Pronoun, existential there there 73 | FW Foreign words 74 | IN Preposition / Conjunction for, of, although, that 75 | JJ Adjective happy, bad 76 | JJR Adjective, comparative happier, worse 77 | JJS Adjective, superlative happiest, worst 78 | LS Symbol, list item A, A. 79 | MD Verb, modal can, could, 'll 80 | NN Noun aircraft, data 81 | NNP Noun, proper London, Michael 82 | NNPS Noun, proper, plural Australians, Methodists 83 | NNS Noun, plural women, books 84 | PDT Determiner, prequalifier quite, all, half 85 | POS Possessive 's, ' 86 | PRP Determiner, possessive second mine, yours 87 | PRPS Determiner, possessive their, your 88 | RB Adverb often, not, very, here 89 | RBR Adverb, comparative faster 90 | RBS Adverb, superlative fastest 91 | RP Adverb, particle up, off, out 92 | SYM Symbol * 93 | TO Preposition to 94 | UH Interjection oh, yes, mmm 95 | VB Verb, infinitive take, live 96 | VBD Verb, past tense took, lived 97 | VBG Verb, gerund taking, living 98 | VBN Verb, past/passive participle taken, lived 99 | VBP Verb, base present form take, live 100 | VBZ Verb, present 3SG -s form takes, lives 101 | WDT Determiner, question which, whatever 102 | WP Pronoun, question who, whoever 103 | WPS Determiner, possessive & question whose 104 | WRB Adverb, question when, how, however 105 | 106 | PP Punctuation, sentence ender ., !, ? 107 | PPC Punctuation, comma , 108 | PPD Punctuation, dollar sign $ 109 | PPL Punctuation, quotation mark left `` 110 | PPR Punctuation, quotation mark right '' 111 | PPS Punctuation, colon, semicolon, elipsis :, ..., - 112 | LRB Punctuation, left bracket (, {, [ 113 | RRB Punctuation, right bracket ), }, ] 114 | 115 | ### Requirements 116 | 117 | * [Hpricot](http://code.whytheluckystiff.net/hpricot/) (optional) 118 | 119 | ### Install 120 | 121 | (sudo) gem install engtagger 122 | 123 | ### Author 124 | 125 | of this Ruby library 126 | 127 | * Yoichiro Hasebe (yohasebe [at] gmail.com) 128 | 129 | ### Acknowledgement 130 | 131 | This Ruby library is a direct port of Lingua::EN::Tagger available at CPAN. 132 | The credit for the crucial part of its algorithm/design therefore goes to 133 | Aaron Coburn, the author of the original Perl version. 134 | 135 | ### License 136 | 137 | This library is distributed under the GPL. Please see the LICENSE file. 138 | -------------------------------------------------------------------------------- /test/test_engtagger.rb: -------------------------------------------------------------------------------- 1 | # Code Generated by ZenTest v. 3.9.2 2 | # classname: asrt / meth = ratio% 3 | # EngTagger: 0 / 24 = 0.00% 4 | 5 | $ENGTAGGER_LIB = File.join(File.dirname(__FILE__), '..', 'lib') 6 | $LOAD_PATH << $ENGTAGGER_LIB 7 | require 'test/unit' unless defined? $ZENTEST and $ZENTEST 8 | require 'engtagger' 9 | 10 | class TestEngTagger < Test::Unit::TestCase 11 | 12 | @@untagged =<Lisa Raines , a lawyer and director of government relations for the Industrial Biotechnical Association , contends that a judge well-versed in patent law and the concerns of research-based industries would have ruled otherwise . 18 | EOD 19 | 20 | def setup 21 | @tagger = EngTagger.new 22 | tagpath = File.join($ENGTAGGER_LIB, @tagger.conf[:tag_path]) 23 | wordpath = File.join($ENGTAGGER_LIB, @tagger.conf[:word_path]) 24 | if !File.exists?(tagpath) or !File.exists?(wordpath) 25 | @tagger.install 26 | end 27 | end 28 | 29 | def text_get_ext 30 | model = '[^<]+\s*' 31 | assert_equal(model, EngTagger.get_ext(model, "cd")) 32 | end 33 | 34 | def test_explain_tag 35 | assert_equal("noun", EngTagger.explain_tag("nn")) 36 | assert_equal("verb_infinitive", EngTagger.explain_tag("vb")) 37 | end 38 | 39 | def test_add_tags 40 | assert_instance_of(String, @tagger.add_tags(@@untagged)) 41 | end 42 | 43 | def test_assign_tag 44 | models = []; tests = [] 45 | models += [@tagger.conf[:unknown_word_tag], "sym"] 46 | tests += [["pp","-unknown-"], ["pp", "-sym-"]] 47 | models.length.times do |i| 48 | assert_equal(models[i],@tagger.assign_tag(*tests[i])) 49 | end 50 | tests = [] 51 | tests += [["vb","water"], ["nn", "runs"]] 52 | models.length.times do |i| 53 | result = @tagger.assign_tag(*tests[i]) 54 | assert(EngTagger.hmm.keys.index(result)) 55 | end 56 | end 57 | 58 | def test_classify_unknown_word 59 | assert_equal("*LRB*", @tagger.classify_unknown_word("{")) 60 | assert_equal("*NUM*", @tagger.classify_unknown_word("123.4567")) 61 | assert_equal("*ORD*", @tagger.classify_unknown_word("40th")) 62 | assert_equal("-abr-", @tagger.classify_unknown_word("GT-R")) 63 | assert_equal("-hyp-adj-", @tagger.classify_unknown_word("extremely-high")) 64 | assert_equal("-sym-", @tagger.classify_unknown_word("&&")) 65 | assert_equal("-ing-", @tagger.classify_unknown_word("wikiing")) 66 | assert_equal("-unknown-", @tagger.classify_unknown_word("asefasdf")) 67 | end 68 | 69 | def test_clean_text 70 | test = "I am 100% sure that Dr. Watson is too naive. I'm sorry." 71 | model = ["I","am","100","%","sure","that","Dr.","Watson","is","too","naive",".","I","'m","sorry","."] 72 | assert_equal(model, @tagger.clean_text(test)) 73 | end 74 | 75 | def test_clean_word 76 | models = []; tests = [] 77 | models += ["*NUM*"] 78 | models += ["Plays"] 79 | models += ["pleadingly"] 80 | tests += ["1973.0820", "Plays", "Pleadingly"] 81 | models.length.times do |i| 82 | assert_equal(models[i], @tagger.clean_word(tests[i])) 83 | end 84 | end 85 | 86 | def test_get_max_noun_phrases 87 | result = @tagger.get_max_noun_phrases(@@tagged) 88 | assert_instance_of(Hash, result) 89 | end 90 | 91 | def test_get_max_noun_regex 92 | assert_instance_of(Regexp, @tagger.get_max_noun_regex) 93 | end 94 | 95 | def test_get_noun_phrases 96 | result = @tagger.get_noun_phrases(@@tagged) 97 | assert_instance_of(Hash, result) 98 | end 99 | 100 | def test_get_nouns 101 | result = @tagger.get_nouns(@@tagged) 102 | assert_instance_of(Hash, result) 103 | end 104 | 105 | def test_get_proper_nouns 106 | test = "BBC means British Broadcasting Corporation ." 107 | result = @tagger.get_proper_nouns(test) 108 | assert_instance_of(Hash, result) 109 | end 110 | 111 | def test_get_readable 112 | test = "I woke up to the sound of pouring rain." 113 | result = @tagger.get_readable(test) 114 | assert(String, result) 115 | end 116 | 117 | def test_get_sentences 118 | result = @tagger.get_sentences(@@untagged) 119 | assert_equal(4, result.length) 120 | end 121 | 122 | def test_get_words 123 | @tagger.conf[:longest_noun_phrase] = 1 124 | result1 = @tagger.get_words(@@tagged) 125 | @tagger.conf[:longest_noun_phrase] = 10 126 | result2 = @tagger.get_words(@@tagged) 127 | assert_instance_of(Hash, result1) 128 | assert_instance_of(Hash, result2) 129 | end 130 | 131 | def test_reset 132 | @tagger.conf[:current_tag] = 'nn' 133 | @tagger.reset 134 | assert_equal('pp', @tagger.conf[:current_tag]) 135 | end 136 | 137 | def test_split_punct 138 | models = []; texts = [] 139 | models << ["`", "test"]; texts << "`test" 140 | models << ["``", "test"]; texts << "\"test" 141 | models << ["`", "test"]; texts << "'test" 142 | models << ["''"]; texts << '"' 143 | models << ["test", "'"]; texts << "test' " 144 | models << ["-", "test", "-"]; texts << "---test-----" 145 | models << ["test", ",", "test"]; texts << "test,test" 146 | models << ["123,456"]; texts << "123,456" 147 | models << ["test", ":"]; texts << "test:" 148 | models << ["test1", "...", "test2"]; texts << "test1...test2" 149 | models << ["{", "ab","[","(","c",")","[","d","]","]","}"]; texts << "{ab[(c)[d]]}" 150 | models << ["test", "#", "test"]; texts << "test#test" 151 | models << ["I", "'d", "like"]; texts << "I'd like" 152 | models << ["is", "n't", "so"]; texts << "isn't so" 153 | models << ["we", "'re", "all"]; texts << "we're all" 154 | 155 | texts.each_with_index do |text, index| 156 | assert_equal(models[index], @tagger.split_punct(text)) 157 | end 158 | end 159 | 160 | def test_split_sentences 161 | models = []; tests = [] 162 | models << ["He", "is", "a", "u.s.", "army", "officer", "."] 163 | tests << ["He", "is", "a", "u.s.", "army", "officer."] 164 | models << ["He", "is", "Mr.", "Johnson", ".", "He", "'s", "my", "friend", "."] 165 | tests << ["He", "is", "Mr.", "Johnson.", "He", "'s", "my", "friend."] 166 | models.length.times do |i| 167 | assert_equal(models[i], @tagger.split_sentences(tests[i])) 168 | end 169 | end 170 | 171 | def test_stem 172 | word = "gets" 173 | old = @tagger.conf[:stem] 174 | @tagger.conf[:stem] = true 175 | assert_equal("get", @tagger.stem(word)) 176 | # the following should not work since we memoize stem method 177 | # @tagger.conf[:stem] = false 178 | # assert_equal("gets", @tagger.stem(word)) 179 | @tagger.conf[:stem] = old 180 | end 181 | 182 | def test_strip_tags 183 | assert_instance_of(String, @tagger.strip_tags(@@tagged)) 184 | end 185 | 186 | def test_valid_text 187 | text = nil 188 | assert(!@tagger.valid_text(text)) 189 | text = "this is test text" 190 | assert(@tagger.valid_text(text)) 191 | text = "" 192 | assert(!@tagger.valid_text(text)) 193 | end 194 | end 195 | 196 | # Number of errors detected: 24 197 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Library General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License 307 | along with this program; if not, write to the Free Software 308 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 309 | 310 | 311 | Also add information on how to contact you by electronic and paper mail. 312 | 313 | If the program is interactive, make it output a short notice like this 314 | when it starts in an interactive mode: 315 | 316 | Gnomovision version 69, Copyright (C) year name of author 317 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 318 | This is free software, and you are welcome to redistribute it 319 | under certain conditions; type `show c' for details. 320 | 321 | The hypothetical commands `show w' and `show c' should show the appropriate 322 | parts of the General Public License. Of course, the commands you use may 323 | be called something other than `show w' and `show c'; they could even be 324 | mouse-clicks or menu items--whatever suits your program. 325 | 326 | You should also get your employer (if you work as a programmer) or your 327 | school, if any, to sign a "copyright disclaimer" for the program, if 328 | necessary. Here is a sample; alter the names: 329 | 330 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 331 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 332 | 333 | , 1 April 1989 334 | Ty Coon, President of Vice 335 | 336 | This General Public License does not permit incorporating your program into 337 | proprietary programs. If your program is a subroutine library, you may 338 | consider it more useful to permit linking proprietary applications with the 339 | library. If this is what you want to do, use the GNU Library General 340 | Public License instead of this License. 341 | Copyright (c) 2012 Yoichiro Hasebe 342 | 343 | MIT License 344 | 345 | Permission is hereby granted, free of charge, to any person obtaining 346 | a copy of this software and associated documentation files (the 347 | "Software"), to deal in the Software without restriction, including 348 | without limitation the rights to use, copy, modify, merge, publish, 349 | distribute, sublicense, and/or sell copies of the Software, and to 350 | permit persons to whom the Software is furnished to do so, subject to 351 | the following conditions: 352 | 353 | The above copyright notice and this permission notice shall be 354 | included in all copies or substantial portions of the Software. 355 | 356 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 357 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 358 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 359 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 360 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 361 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 362 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /lib/engtagger/tags.yml: -------------------------------------------------------------------------------- 1 | --- #YAML:1.0 2 | cc: { cc: 0.000237618, cd: 0.0439594, det: 0.113785, ex: 0.00349638, fw: 0.000101836, in: 0.0513256, jj: 0.108829, jjr: 0.0102855, jjs: 0.00207067, lrb: 0.000339455, ls: 6.7891e-05, md: 0.0106928, nn: 0.118639, nnp: 0.161682, nnps: 0.00332666, nns: 0.0710139, pdt: 0.000577073, ppc: 0.00763773, ppd: 0.0195865, ppl: 0.00590651, pps: 0.000305509, prp: 0.0376116, prps: 0.0172104, rb: 0.0518687, rbr: 0.00264775, rbs: 0.000882583, rp: 0.000101836, rrb: 6.7891e-05, to: 0.00740012, uh: 6.7891e-05, vb: 0.0327574, vbd: 0.0380189, vbg: 0.0210462, vbn: 0.0144268, vbp: 0.011813, vbz: 0.0227435, wdt: 0.00105231, wp: 0.00213857, wps: 0.000237618, wrb: 0.00403951 } 3 | cd: { cc: 0.0167197, cd: 0.198722, det: 0.0292094, fw: 4.45266e-05, in: 0.0892758, jj: 0.0370906, jjr: 0.00211501, jjs: 0.000779215, lrb: 0.00189238, md: 0.00211501, nn: 0.205067, nnp: 0.0134693, nnps: 0.000868268, nns: 0.156511, pos: 0.000734688, pp: 0.0724002, ppc: 0.0968453, ppd: 8.90531e-05, ppl: 0.000690162, ppr: 0.000489792, pps: 0.00761404, prp: 0.000734688, prps: 8.90531e-05, rb: 0.00429681, rbr: 0.00020037, rbs: 0.00020037, rrb: 0.00866042, sym: 0.00013358, to: 0.0362892, vb: 4.45266e-05, vbd: 0.00616693, vbg: 0.00169201, vbn: 0.00311686, vbp: 0.00129127, vbz: 0.00262707, wdt: 0.000756952, wp: 0.000244896, wps: 2.22633e-05, wrb: 0.000690162 } 4 | det: { cc: 0.000711631, cd: 0.0235332, det: 0.00166047, fw: 0.000227327, in: 0.00962679, jj: 0.216346, jjr: 0.00550526, jjs: 0.00935004, lrb: 0.000474421, md: 0.00213489, nn: 0.473709, nnp: 0.113584, nnps: 0.00454653, nns: 0.0734265, pdt: 9.88377e-06, pos: 2.96513e-05, pp: 0.00157152, ppc: 0.0022535, ppd: 0.0090733, ppl: 0.00576224, ppr: 4.94188e-05, pps: 0.000355816, prp: 0.000484305, prps: 0.000642445, rb: 0.0102, rbr: 0.00179885, rbs: 0.00280699, rp: 6.91864e-05, rrb: 3.95351e-05, to: 0.000286629, uh: 1.97675e-05, vb: 0.00023721, vbd: 0.00230292, vbg: 0.00810469, vbn: 0.00839132, vbp: 0.00171978, vbz: 0.00792678, wdt: 0.000217443, wp: 0.000800585, wrb: 9.88377e-06 } 5 | ex: { det: 0.00185701, md: 0.0807799, pos: 0.0120706, ppc: 0.00371402, prp: 0.000928505, rb: 0.0222841, to: 0.000928505, vb: 0.00185701, vbd: 0.182916, vbp: 0.224698, vbz: 0.46611, wp: 0.00185701 } 6 | fw: { cc: 0.0149254, det: 0.00746269, fw: 0.246269, in: 0.0261194, jj: 0.0261194, lrb: 0.0149254, md: 0.00746269, nn: 0.0708955, nnp: 0.201493, nns: 0.0298507, pos: 0.00746269, pp: 0.108209, ppc: 0.0970149, ppd: 0.00746269, ppl: 0.00373134, ppr: 0.0559702, pps: 0.011194, rrb: 0.00746269, to: 0.0149254, vbn: 0.00373134, vbp: 0.00373134, vbz: 0.0335821 } 7 | in: { cc: 0.00128828, cd: 0.0603768, det: 0.328199, ex: 0.00150162, fw: 0.000180523, in: 0.0200791, jj: 0.0909427, jjr: 0.00493977, jjs: 0.0046936, lrb: 0.000295402, ls: 8.2056e-06, md: 0.000123084, nn: 0.108191, nnp: 0.150409, nnps: 0.00177241, nns: 0.0591706, pdt: 0.00137854, pos: 3.28224e-05, pp: 0.00195293, ppc: 0.00242886, ppd: 0.0276693, ppl: 0.00563725, ppr: 0.000155906, pps: 0.000229757, prp: 0.0301884, prps: 0.03576, rb: 0.0142449, rbr: 0.000722093, rbs: 0.000180523, rp: 1.64112e-05, rrb: 7.38504e-05, sym: 1.64112e-05, to: 0.00224833, uh: 8.2056e-06, vb: 0.000459514, vbd: 0.000672859, vbg: 0.0307956, vbn: 0.00466078, vbp: 0.000246168, vbz: 0.000582598, wdt: 0.00357764, wp: 0.00224833, wps: 3.28224e-05, wrb: 0.0016083 } 8 | jj: { cc: 0.0172469, cd: 0.0161175, det: 0.00358756, ex: 5.31491e-05, fw: 0.000132873, in: 0.0560723, jj: 0.0733723, jjr: 0.000704225, jjs: 0.000345469, lrb: 0.000651076, md: 0.000398618, nn: 0.44992, nnp: 0.0368057, nnps: 0.00159447, nns: 0.23282, pdt: 2.65745e-05, pos: 0.000172735, pp: 0.0243689, ppc: 0.0293649, ppd: 0.00265745, ppl: 0.0020861, ppr: 0.00478342, pps: 0.00309593, prp: 0.00139516, prps: 0.000265745, rb: 0.00356099, rbr: 0.000225884, rbs: 9.30109e-05, rp: 0.000106298, rrb: 0.000558065, to: 0.0274382, uh: 1.32873e-05, vb: 0.000106298, vbd: 0.00132873, vbg: 0.00324209, vbn: 0.00191337, vbp: 0.000983258, vbz: 0.00139516, wdt: 0.000119585, wp: 0.000239171, wrb: 0.000637789 } 9 | jjr: { cc: 0.0207869, cd: 0.0027221, det: 0.00668151, in: 0.320713, jj: 0.0487503, lrb: 0.000247463, md: 0.000989854, nn: 0.268003, nnp: 0.0173224, nnps: 0.000494927, nns: 0.169018, pdt: 0.000247463, pp: 0.0514724, ppc: 0.0400891, ppd: 0.000989854, ppl: 0.00123732, ppr: 0.00173224, pps: 0.00445434, prp: 0.00173224, prps: 0.000742391, rb: 0.0054442, rp: 0.00148478, rrb: 0.000494927, to: 0.023509, vb: 0.000247463, vbd: 0.0027221, vbg: 0.00173224, vbn: 0.00247463, vbp: 0.00123732, vbz: 0.00148478, wrb: 0.000742391 } 10 | jjs: { cc: 0.00835422, cd: 0.06934, det: 0.0183793, ex: 0.00125313, in: 0.161654, jj: 0.103592, lrb: 0.000417711, md: 0.000835422, nn: 0.351713, nnp: 0.0108605, nnps: 0.00167084, nns: 0.155388, pdt: 0.00125313, pp: 0.0121136, ppc: 0.0263158, ppd: 0.0108605, ppl: 0.00125313, ppr: 0.00167084, pps: 0.00459482, prp: 0.00459482, prps: 0.000417711, rb: 0.015873, to: 0.0037594, vb: 0.00292398, vbd: 0.00334169, vbg: 0.00584795, vbn: 0.00501253, vbp: 0.0121136, vbz: 0.00292398, wrb: 0.00167084 } 11 | lrb: { cc: 0.0269139, cd: 0.034689, det: 0.0717703, ex: 0.00119617, fw: 0.00299043, in: 0.0843301, jj: 0.0466507, jjr: 0.00179426, ls: 0.00119617, md: 0.00119617, nn: 0.0633971, nnp: 0.330742, nnps: 0.00179426, nns: 0.0161483, pdt: 0.000598086, ppd: 0.166866, ppl: 0.0263158, prp: 0.0197368, prps: 0.0041866, rb: 0.0305024, to: 0.0041866, uh: 0.00358852, vb: 0.0131579, vbd: 0.00119617, vbg: 0.00777512, vbn: 0.020933, vbp: 0.000598086, vbz: 0.000598086, wdt: 0.00717703, wp: 0.00538278, wrb: 0.00239234 } 12 | ls: { jj: 0.03125, nn: 0.015625, pp: 0.40625, ppc: 0.125, pps: 0.109375, rrb: 0.3125 } 13 | md: { cc: 0.00100025, cd: 0.000166708, det: 0.00408435, in: 0.00175044, jj: 0.000500125, jjr: 8.33542e-05, jjs: 8.33542e-05, lrb: 0.000166708, md: 0.000166708, nn: 0.000833542, nnp: 0.000916896, nns: 0.000666833, pp: 0.0020005, ppc: 0.00325081, ppl: 0.00483454, ppr: 0.000166708, pps: 0.000750188, prp: 0.00500125, prps: 0.000250063, rb: 0.169459, rbr: 0.00125031, rbs: 0.000166708, rrb: 0.000166708, to: 0.0030841, vb: 0.797699, vbd: 0.000500125, vbg: 8.33542e-05, vbn: 0.000583479, vbp: 0.000250063, vbz: 8.33542e-05 } 14 | nn: { cc: 0.0397962, cd: 0.00599719, det: 0.00682082, ex: 0.000109816, fw: 6.10091e-05, in: 0.247752, jj: 0.0087182, jjr: 0.00111037, jjs: 5.49082e-05, lrb: 0.00157403, ls: 6.10091e-06, md: 0.0175218, nn: 0.122201, nnp: 0.00971265, nnps: 7.93118e-05, nns: 0.0785187, pdt: 1.22018e-05, pos: 0.0216765, pp: 0.10859, ppc: 0.115027, ppd: 0.000256238, ppl: 0.00240986, ppr: 0.00516137, pps: 0.0116832, prp: 0.00430724, prps: 0.000274541, rb: 0.0177536, rbr: 0.00253188, rbs: 6.711e-05, rp: 0.000585687, rrb: 0.00173876, sym: 4.88073e-05, to: 0.0394485, vb: 0.00140321, vbd: 0.0485022, vbg: 0.00755293, vbn: 0.0103593, vbp: 0.0040022, vbz: 0.0437618, wdt: 0.00787017, wp: 0.00240376, wps: 0.000170825, wrb: 0.00236715 } 15 | nnp: { cc: 0.0419473, cd: 0.01911, det: 0.0026398, ex: 1.75402e-05, fw: 0.000429734, in: 0.0407721, jj: 0.00841928, jjr: 8.77008e-05, jjs: 8.77008e-06, lrb: 0.0034291, md: 0.0110152, nn: 0.0586719, nnp: 0.377991, nnps: 0.0156283, nns: 0.0241967, pdt: 8.77008e-06, pos: 0.0558742, pp: 0.0544184, ppc: 0.140391, ppd: 0.000236792, ppl: 0.00104364, ppr: 0.00256086, pps: 0.00705115, prp: 0.000868238, prps: 9.64709e-05, rb: 0.00898934, rbr: 0.000315723, rbs: 8.77008e-06, rp: 5.26205e-05, rrb: 0.00352557, sym: 3.50803e-05, to: 0.00755981, vb: 0.000964709, vbd: 0.0653722, vbg: 0.00169263, vbn: 0.000815618, vbp: 0.00399916, vbz: 0.0376938, wdt: 0.000938399, wp: 0.000578826, wps: 8.77008e-06, wrb: 0.000534975 } 16 | nnps: { cc: 0.0787172, cd: 0.000971817, det: 0.00323939, ex: 0.000647878, in: 0.0686751, jj: 0.00615484, jjr: 0.000323939, lrb: 0.00291545, md: 0.0233236, nn: 0.0379009, nnp: 0.284742, nnps: 0.0145773, nns: 0.0119857, pos: 0.0255912, pp: 0.0767736, ppc: 0.129252, ppl: 0.00129576, ppr: 0.00323939, pps: 0.0207321, prp: 0.00129576, prps: 0.000323939, rb: 0.0136054, rbr: 0.000323939, rrb: 0.00226757, sym: 0.000323939, to: 0.0132815, vb: 0.000971817, vbd: 0.080013, vbg: 0.00356333, vbn: 0.00259151, vbp: 0.0553936, vbz: 0.0259151, wdt: 0.000323939, wp: 0.00842242, wps: 0.000323939 } 17 | nns: { cc: 0.0593685, cd: 0.00163635, det: 0.0170803, ex: 8.11414e-05, in: 0.23504, jj: 0.0166746, jjr: 0.00119007, jjs: 6.76178e-05, lrb: 0.00425992, md: 0.0277233, nn: 0.0211373, nnp: 0.00301575, nnps: 2.70471e-05, nns: 0.0107783, pdt: 5.40943e-05, pos: 0.00922307, pp: 0.134952, ppc: 0.124079, ppd: 0.000283995, ppl: 0.00232605, ppr: 0.00405707, pps: 0.0189465, prp: 0.00462506, prps: 0.000229901, rb: 0.0311177, rbr: 0.00192035, rbs: 0.000121712, rp: 0.000689702, rrb: 0.00167692, sym: 5.40943e-05, to: 0.0396105, vb: 0.00397593, vbd: 0.0745284, vbg: 0.0141051, vbn: 0.0207451, vbp: 0.0845223, vbz: 0.00806004, wdt: 0.0124011, wp: 0.00695111, wps: 0.000446278, wrb: 0.00221786 } 18 | pdt: { det: 0.913832, jj: 0.00226757, nnp: 0.00226757, prps: 0.0816327 } 19 | pos: { cc: 0.00648268, cd: 0.0250046, det: 0.000370439, fw: 0.000185219, in: 0.00277829, jj: 0.207261, jjr: 0.00231524, jjs: 0.0253751, lrb: 0.000277829, md: 0.000926097, nn: 0.417114, nnp: 0.107242, nnps: 0.00361178, nns: 0.128264, pp: 0.00509354, ppc: 0.00592702, ppd: 0.0095388, ppl: 0.0114836, ppr: 0.000370439, pps: 0.000185219, prp: 9.26097e-05, prps: 0.000277829, rb: 0.00555658, rbr: 0.000370439, rbs: 0.00240785, rrb: 0.000926097, to: 0.000185219, vb: 0.000370439, vbd: 0.00601963, vbg: 0.0115762, vbn: 0.00768661, vbp: 0.000463049, vbz: 0.00388961, wp: 0.000185219, wrb: 0.000185219 } 20 | pp: { cc: 0.0525182, cd: 0.00978944, det: 0.205127, ex: 0.00404302, fw: 0.000164183, in: 0.119587, jj: 0.0381932, jjr: 0.00164183, jjs: 0.00217543, lrb: 0.00441243, ls: 0.000759348, md: 0.000677256, nn: 0.0371465, nnp: 0.185178, nnps: 0.00188811, nns: 0.0392193, pdt: 0.000656734, pp: 0.00014366, ppc: 6.15688e-05, ppd: 0.00014366, ppl: 0.0753602, ppr: 0.0597628, pps: 0.00285269, prp: 0.0552888, prps: 0.00722407, rb: 0.0521693, rbr: 0.00186759, rbs: 0.000513073, rrb: 0.00527439, sym: 0.000779871, to: 0.00316053, uh: 0.000595165, vb: 0.00303739, vbd: 0.000718302, vbg: 0.0111439, vbn: 0.00547962, vbp: 0.00034889, vbz: 0.00135451, wdt: 0.000636211, wp: 0.00289373, wps: 2.05229e-05, wrb: 0.00599269 } 21 | ppc: { cc: 0.0919462, cd: 0.0208826, det: 0.13353, ex: 0.00257932, fw: 0.000314148, in: 0.0867545, jj: 0.0418148, jjr: 0.00165341, jjs: 0.000975513, lrb: 0.000314148, ls: 4.96024e-05, md: 0.0101519, nn: 0.0487426, nnp: 0.127924, nnps: 0.00105818, nns: 0.0260578, pdt: 0.000214944, ppd: 0.00221557, ppl: 0.0133265, ppr: 0.0579356, pps: 8.26706e-05, prp: 0.0412692, prps: 0.00410046, rb: 0.0546122, rbr: 0.000777104, rbs: 0.000363751, rp: 8.26706e-05, to: 0.00945752, uh: 0.000396819, vb: 0.00376978, vbd: 0.0529588, vbg: 0.044113, vbn: 0.0206842, vbp: 0.00866388, vbz: 0.031481, wdt: 0.0360278, wp: 0.0122848, wps: 0.00219904, wrb: 0.00823399 } 22 | ppd: { cd: 0.990264, jj: 0.00973559 } 23 | ppl: { cc: 0.0184726, cd: 0.00777202, det: 0.147894, ex: 0.0210633, fw: 0.0032665, in: 0.0637531, jj: 0.113877, jjr: 0.0032665, jjs: 0.0032665, lrb: 0.000225276, md: 0.0117144, nn: 0.0839153, nnp: 0.0762559, nnps: 0.00168957, nns: 0.0355936, pdt: 0.000675828, ppd: 0.000225276, ppl: 0.000337914, pps: 0.000675828, prp: 0.215026, prps: 0.010588, rb: 0.0527146, rbr: 0.00168957, to: 0.00518135, uh: 0.00337914, vb: 0.0259067, vbd: 0.0114891, vbg: 0.0149809, vbn: 0.0111512, vbp: 0.0117144, vbz: 0.0198243, wdt: 0.00146429, wp: 0.00923631, wrb: 0.0117144 } 24 | ppr: { cc: 0.0600601, cd: 0.00462, det: 0.0990991, ex: 0.0017325, fw: 0.000231, in: 0.131786, jj: 0.019635, jjr: 0.000462, jjs: 0.000693001, lrb: 0.00820051, md: 0.0047355, nn: 0.0446985, nnp: 0.135251, nnps: 0.0003465, nns: 0.0294525, pdt: 0.0003465, pp: 0.0021945, ppc: 0.000808501, ppd: 0.0001155, ppl: 0.011319, ppr: 0.00646801, pps: 0.00993301, prp: 0.10857, prps: 0.004389, rb: 0.033495, rbr: 0.0003465, rbs: 0.000231, rrb: 0.0042735, sym: 0.0001155, to: 0.0167475, uh: 0.000231, vb: 0.0026565, vbd: 0.0937861, vbg: 0.0125895, vbn: 0.00704551, vbp: 0.0033495, vbz: 0.122661, wdt: 0.00924001, wp: 0.0026565, wps: 0.000577501, wrb: 0.00485101 } 25 | pps: { cc: 0.0653852, cd: 0.123542, det: 0.117299, ex: 0.00180713, fw: 0.000328569, in: 0.0791852, jj: 0.0395926, jjr: 0.00312141, jjs: 0.000985707, lrb: 0.000492854, ls: 0.00279284, md: 0.00903565, nn: 0.042714, nnp: 0.120092, nnps: 0.00114999, nns: 0.034664, pdt: 0.000492854, pp: 0.0180713, ppc: 0.000821423, ppd: 0.0300641, ppl: 0.0668638, ppr: 0.00131428, pps: 0.00443568, prp: 0.0423854, prps: 0.00706424, rb: 0.0542139, rbr: 0.000657138, rbs: 0.000657138, sym: 0.000492854, to: 0.0110071, uh: 0.000657138, vb: 0.0139642, vbd: 0.0185642, vbg: 0.021357, vbn: 0.0137999, vbp: 0.0119928, vbz: 0.0159356, wdt: 0.0108428, wp: 0.00591424, wps: 0.000164285, wrb: 0.00607853 } 26 | prp: { cc: 0.00781945, cd: 0.000936461, det: 0.0118931, ex: 0.000140469, fw: 4.68231e-05, in: 0.0351641, jj: 0.0077258, jjr: 0.00149834, jjs: 9.36461e-05, lrb: 0.000936461, md: 0.123566, nn: 0.00280938, nnp: 0.000936461, nnps: 4.68231e-05, nns: 0.00103011, pdt: 9.36461e-05, pos: 0.00112375, pp: 0.0305755, ppc: 0.021164, ppd: 0.000234115, ppl: 0.00355855, ppr: 0.000421407, pps: 0.00323079, prp: 0.00163881, prps: 0.000140469, rb: 0.0536592, rbr: 0.00145151, rbs: 0.000234115, rp: 0.00421407, rrb: 0.000468231, to: 0.0169968, vb: 0.0125018, vbd: 0.252002, vbg: 0.00257527, vbn: 0.0024348, vbp: 0.176242, vbz: 0.21843, wdt: 0.000280938, wp: 0.000655523, wps: 4.68231e-05, wrb: 0.000983284 } 27 | prps: { cc: 0.000488234, cd: 0.0210917, fw: 0.00029294, in: 9.76467e-05, jj: 0.240992, jjr: 0.00273411, jjs: 0.0106435, lrb: 0.00087882, nn: 0.441656, nnp: 0.048921, nnps: 0.00058588, nns: 0.195293, pp: 9.76467e-05, ppc: 0.00029294, ppd: 0.00790938, ppl: 0.00478469, pps: 9.76467e-05, rb: 0.00546822, rbr: 0.000195293, rbs: 0.00205058, rrb: 0.00029294, vbd: 0.00029294, vbg: 0.0075188, vbn: 0.00712821, vbz: 0.000195293 } 28 | rb: { cc: 0.00916326, cd: 0.0408682, det: 0.0473348, ex: 0.000811603, fw: 5.23615e-05, in: 0.127893, jj: 0.102079, jjr: 0.0129071, jjs: 0.000628338, lrb: 0.000575977, md: 0.0102367, nn: 0.0117552, nnp: 0.00685936, nnps: 0.000261808, nns: 0.00463399, pdt: 0.000654519, pos: 0.000183265, pp: 0.0496125, ppc: 0.0972091, ppd: 0.0123311, ppl: 0.00170175, ppr: 0.0014923, pps: 0.004346, prp: 0.00877055, prps: 0.00235627, rb: 0.0726254, rbr: 0.0077495, rbs: 0.000209446, rp: 0.00034035, rrb: 0.000445073, sym: 2.61808e-05, to: 0.02694, vb: 0.102, vbd: 0.0548749, vbg: 0.0303435, vbn: 0.081108, vbp: 0.0255262, vbz: 0.0383024, wdt: 0.000261808, wp: 0.00120431, wrb: 0.00332496 } 29 | rbr: { cc: 0.0206897, cd: 0.00045977, det: 0.0845977, in: 0.234483, jj: 0.312644, jjr: 0.00045977, lrb: 0.00045977, md: 0.00229885, nn: 0.00505747, nns: 0.00045977, pp: 0.121379, ppc: 0.0616092, ppl: 0.00229885, pps: 0.00689655, prp: 0.00321839, rb: 0.0731035, rbr: 0.00045977, rp: 0.00045977, rrb: 0.00045977, to: 0.0174713, vb: 0.0124138, vbd: 0.00735632, vbg: 0.00551724, vbn: 0.0206897, vbp: 0.00091954, vbz: 0.00321839, wrb: 0.00091954 } 30 | rbs: { det: 0.0036036, in: 0.045045, jj: 0.72973, lrb: 0.0018018, md: 0.0018018, nn: 0.0018018, nns: 0.0036036, pp: 0.00720721, ppc: 0.00720721, ppl: 0.0018018, pps: 0.0036036, prp: 0.0018018, rb: 0.115315, to: 0.00900901, vb: 0.0036036, vbd: 0.0018018, vbg: 0.0018018, vbn: 0.0540541, vbp: 0.0036036, vbz: 0.0018018 } 31 | rp: { cc: 0.0116031, cd: 0.020458, det: 0.211298, in: 0.246107, jj: 0.050687, jjr: 0.00580153, jjs: 0.00244275, lrb: 0.000610687, md: 0.000305344, nn: 0.0476336, nnp: 0.0256489, nns: 0.0500763, pdt: 0.000916031, pp: 0.0622901, ppc: 0.0445802, ppd: 0.00885496, ppl: 0.00580153, ppr: 0.00610687, pps: 0.00366412, prp: 0.000916031, prps: 0.0525191, rb: 0.0522137, rbr: 0.00396947, rp: 0.000610687, rrb: 0.000305344, to: 0.0531298, vbd: 0.00122137, vbg: 0.0180153, vbn: 0.00183206, vbp: 0.000916031, vbz: 0.000610687, wp: 0.00458015, wrb: 0.00427481 } 32 | rrb: { cc: 0.062759, cd: 0.00947306, det: 0.0396684, ex: 0.000592066, fw: 0.000592066, in: 0.123742, jj: 0.0201303, jjr: 0.00118413, jjs: 0.000592066, lrb: 0.000592066, md: 0.0213144, nn: 0.0568384, nnp: 0.0550622, nnps: 0.000592066, nns: 0.0219065, pp: 0.137951, ppc: 0.168147, ppd: 0.000592066, ppl: 0.0053286, pps: 0.0781528, prp: 0.00947306, prps: 0.00118413, rb: 0.0242747, rbs: 0.000592066, sym: 0.000592066, to: 0.0219065, vb: 0.00828893, vbd: 0.0367081, vbg: 0.0053286, vbn: 0.00651273, vbp: 0.0219065, vbz: 0.0479574, wdt: 0.0053286, wp: 0.0035524, wrb: 0.00118413 } 33 | sym: { cd: 0.0614286, fw: 0.0185714, in: 0.0185714, jj: 0.0471429, nn: 0.0471429, nnp: 0.0471429, pps: 0.604286, rb: 0.0185714, sym: 0.1, vbn: 0.0042857, vbz: 0.0328571 } 34 | to: { cc: 0.000473692, cd: 0.0787422, det: 0.112593, fw: 7.28757e-05, in: 0.00393529, jj: 0.0312272, jjr: 0.0029879, jjs: 0.000255065, lrb: 0.000255065, nn: 0.0318102, nnp: 0.0450736, nnps: 0.000364378, nns: 0.0237939, pdt: 0.000291503, pp: 0.000692319, ppc: 0.000728757, ppd: 0.0441991, ppl: 0.00389885, ppr: 0.000109314, pps: 7.28757e-05, prp: 0.00513774, prps: 0.0133727, rb: 0.00932809, rbr: 0.000947384, rbs: 3.64378e-05, rrb: 3.64378e-05, to: 7.28757e-05, uh: 3.64378e-05, vb: 0.578706, vbd: 0.000109314, vbg: 0.00688675, vbn: 0.00142108, vbp: 3.64378e-05, vbz: 7.28757e-05, wdt: 0.000728757, wp: 0.00102026, wrb: 0.000473692 } 35 | uh: { in: 0.017094, nn: 0.017094, nns: 0.00854701, pp: 0.196581, ppc: 0.529915, ppr: 0.0512821, pps: 0.034188, prp: 0.00854701, prps: 0.00854701, rrb: 0.025641, to: 0.0512821, uh: 0.034188, vb: 0.017094 } 36 | vb: { cc: 0.00924334, cd: 0.0202371, det: 0.223099, ex: 0.000552758, fw: 6.14175e-05, in: 0.112548, jj: 0.0841727, jjr: 0.0105024, jjs: 0.000675593, lrb: 0.00113622, md: 0.000460631, nn: 0.0619703, nnp: 0.0317221, nnps: 0.00049134, nns: 0.0495025, pdt: 0.00156615, pos: 0.000184253, pp: 0.0251198, ppc: 0.0173812, ppd: 0.00896696, ppl: 0.00568112, ppr: 0.00242599, pps: 0.00251812, prp: 0.0368198, prps: 0.043545, rb: 0.0489498, rbr: 0.00500553, rbs: 0.000276379, rp: 0.0320292, rrb: 0.000460631, to: 0.0424395, uh: 9.21263e-05, vb: 0.00518978, vbd: 0.00138189, vbg: 0.017504, vbn: 0.084664, vbp: 0.000368505, vbz: 0.00153544, wdt: 0.000644884, wp: 0.00365434, wps: 6.14175e-05, wrb: 0.00515907 } 37 | vbd: { cc: 0.00304146, cd: 0.0627501, det: 0.16768, ex: 0.00128061, fw: 2.66795e-05, in: 0.113841, jj: 0.0564004, jjr: 0.00749693, jjs: 0.000800384, lrb: 0.000320154, md: 0.000373513, nn: 0.0329491, nnp: 0.0568806, nnps: 0.000133397, nns: 0.0232912, pdt: 0.000693666, pp: 0.0460488, ppc: 0.0231845, ppd: 0.0156342, ppl: 0.00658983, ppr: 0.000346833, pps: 0.00317486, prp: 0.0606424, prps: 0.0258791, rb: 0.0855877, rbr: 0.00381516, rbs: 0.000373513, rp: 0.0159543, rrb: 0.000106718, to: 0.0569874, uh: 0.000106718, vb: 0.00293474, vbd: 0.00200096, vbg: 0.0234779, vbn: 0.0963663, vbp: 0.000106718, vbz: 0.000320154, wdt: 0.000160077, wp: 0.000586948, wrb: 0.00165413 } 38 | vbg: { cc: 0.00981521, cd: 0.0179306, det: 0.184789, ex: 0.000219334, in: 0.139826, jj: 0.0721062, jjr: 0.00937654, jjs: 0.000658003, lrb: 0.000383835, md: 0.000493502, nn: 0.12694, nnp: 0.0395898, nnps: 0.000658003, nns: 0.0894336, pdt: 0.00109667, pos: 5.48336e-05, pp: 0.0169984, ppc: 0.0129407, ppd: 0.00690903, ppl: 0.00509952, ppr: 0.00208368, pps: 0.00197401, prp: 0.0244558, prps: 0.0302681, rb: 0.0384932, rbr: 0.00438669, rbs: 0.000109667, rp: 0.0269781, rrb: 0.000438669, sym: 5.48336e-05, to: 0.0953008, uh: 0.000164501, vb: 0.000877337, vbd: 0.00307068, vbg: 0.00433185, vbn: 0.0253879, vbp: 0.000548336, vbz: 0.00202884, wdt: 0.000164501, wp: 0.00202884, wrb: 0.00153534 } 39 | vbn: { cc: 0.0132743, cd: 0.0116251, det: 0.0675382, ex: 8.04505e-05, in: 0.363596, jj: 0.0445696, jjr: 0.0033387, jjs: 0.000201126, lrb: 0.000402253, md: 0.00116653, nn: 0.0660097, nnp: 0.0261866, nnps: 0.000201126, nns: 0.0381738, pdt: 0.000643604, pos: 0.000120676, pp: 0.0502011, ppc: 0.0331054, ppd: 0.00659694, ppl: 0.00514883, ppr: 0.00217216, pps: 0.00418343, prp: 0.00897023, prps: 0.00993564, rb: 0.0495173, rbr: 0.00337892, rbs: 0.000241352, rp: 0.0212389, rrb: 0.000724055, to: 0.106315, vb: 0.000643604, vbd: 0.00213194, vbg: 0.0203138, vbn: 0.0319791, vbp: 0.00116653, vbz: 0.00168946, wdt: 0.000241352, wp: 0.00076428, wrb: 0.00221239 } 40 | vbp: { cc: 0.00429212, cd: 0.00897444, det: 0.119594, ex: 0.0018209, fw: 6.50322e-05, in: 0.0926058, jj: 0.0856474, jjr: 0.00851922, jjs: 0.00058529, lrb: 0.000650322, md: 0.00169084, nn: 0.0299798, nnp: 0.0186642, nnps: 0.000325161, nns: 0.0325161, pdt: 0.000780386, pos: 0.000130064, pp: 0.0194446, ppc: 0.0195747, ppd: 0.00266632, ppl: 0.00487741, ppr: 0.000455225, pps: 0.00273135, prp: 0.0357027, prps: 0.0108604, rb: 0.164076, rbr: 0.00526761, rbs: 0.000845418, rp: 0.00903947, rrb: 0.000325161, to: 0.0516356, vb: 0.00279638, vbd: 0.0036418, vbg: 0.084867, vbn: 0.165702, vbp: 0.00117058, vbz: 0.00260129, wdt: 0.000260129, wp: 0.00234116, wps: 6.50322e-05, wrb: 0.00221109 } 41 | vbz: { cc: 0.00276149, cd: 0.0202761, det: 0.162928, ex: 0.000718744, fw: 3.78286e-05, in: 0.0886325, jj: 0.073501, jjr: 0.00809533, jjs: 0.000643087, lrb: 0.000302629, md: 0.000794401, nn: 0.0347267, nnp: 0.0600719, nnps: 0.0002648, nns: 0.0160393, pdt: 0.000529601, pp: 0.0305277, ppc: 0.0251182, ppd: 0.00446378, ppl: 0.0099111, ppr: 0.000226972, pps: 0.00419898, prp: 0.0231511, prps: 0.0117269, rb: 0.135616, rbr: 0.00397201, rbs: 0.000870059, rp: 0.00696047, rrb: 0.000189143, to: 0.05122, uh: 3.78286e-05, vb: 0.00321543, vbd: 0.00287498, vbg: 0.0592775, vbn: 0.148515, vbp: 0.000189143, vbz: 0.000945716, wdt: 0.000302629, wp: 0.00223189, wrb: 0.00393418 } 42 | wdt: { cc: 0.000375728, cd: 0.0046966, det: 0.0272403, ex: 0.00169078, in: 0.00901747, jj: 0.00920534, jjr: 0.000751456, jjs: 0.000751456, lrb: 0.000187864, md: 0.134135, nn: 0.0146534, nnp: 0.0212286, nnps: 0.000187864, nns: 0.0174714, pos: 0.000187864, ppc: 0.00544806, ppd: 0.00093932, ppl: 0.000751456, pps: 0.000375728, prp: 0.0338155, prps: 0.00169078, rb: 0.0479053, rbr: 0.000187864, to: 0.00150291, vb: 0.00112718, vbd: 0.229946, vbn: 0.00093932, vbp: 0.145595, vbz: 0.287995 } 43 | wp: { cd: 0.0017319, det: 0.0439903, in: 0.017319, jj: 0.00623485, jjr: 0.000692761, jjs: 0.000692761, lrb: 0.00034638, ls: 0.00034638, md: 0.0651195, nn: 0.0128161, nnp: 0.0193973, nns: 0.00969865, pdt: 0.00034638, pos: 0.00034638, pp: 0.00138552, ppc: 0.00692761, ppl: 0.000692761, prp: 0.0841704, prps: 0.0051957, rb: 0.0450294, rbr: 0.000692761, rbs: 0.00034638, rrb: 0.00103914, to: 0.00692761, vb: 0.00381018, vbd: 0.271562, vbn: 0.000692761, vbp: 0.164184, vbz: 0.227918, wdt: 0.00034638 } 44 | wps: { cd: 0.0182648, jj: 0.182648, jjr: 0.00456621, jjs: 0.00913242, nn: 0.351598, nnp: 0.0593607, nns: 0.328767, ppd: 0.0182648, ppl: 0.00456621, rb: 0.00456621, vbg: 0.0136986, vbn: 0.00456621 } 45 | wrb: { cc: 0.00342857, cd: 0.00647619, det: 0.252952, ex: 0.00952381, in: 0.00952381, jj: 0.0963809, jjr: 0.00114286, jjs: 0.00152381, lrb: 0.000380952, md: 0.0118095, nn: 0.0609524, nnp: 0.0998095, nnps: 0.00152381, nns: 0.0788571, pdt: 0.00228571, pp: 0.00304762, ppc: 0.0110476, ppd: 0.000380952, ppl: 0.00266667, prp: 0.212571, prps: 0.023619, rb: 0.0388571, rbs: 0.000380952, rp: 0.000380952, rrb: 0.000380952, to: 0.023619, vb: 0.00228571, vbd: 0.00685714, vbg: 0.00990476, vbn: 0.0102857, vbp: 0.00609524, vbz: 0.00990476, wdt: 0.000380952, wp: 0.000380952, wrb: 0.000380952 } 46 | -------------------------------------------------------------------------------- /lib/engtagger.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # -*- coding: utf-8 -*- 3 | 4 | $LOAD_PATH << File.join(File.dirname(__FILE__), 'engtagger') 5 | require 'rubygems' 6 | require 'kconv' 7 | require 'porter' 8 | 9 | # use hpricot for extracting English text from docs with XML like tags 10 | begin 11 | require 'hpricot' 12 | rescue LoadError 13 | $no_hpricot = true 14 | end 15 | 16 | # File paths 17 | $lexpath = File.join(File.dirname(__FILE__), 'engtagger') 18 | $word_path = File.join($lexpath, "pos_words.hash") 19 | $tag_path = File.join($lexpath, "pos_tags.hash") 20 | 21 | # for memoization (code snipet from http://eigenclass.org/hiki/bounded-space-memoization) 22 | class Module 23 | def memoize(method) 24 | # alias_method is faster than define_method + old.bind(self).call 25 | alias_method "__memoized__#{method}", method 26 | module_eval <<-EOF 27 | def #{method}(*a, &b) 28 | # assumes the block won't change the result if the args are the same 29 | (@__memoized_#{method}_cache ||= {})[a] ||= __memoized__#{method}(*a, &b) 30 | end 31 | EOF 32 | end 33 | end 34 | 35 | # English part-of-speech tagger class 36 | class EngTagger 37 | 38 | ################# 39 | # Class methods # 40 | ################# 41 | 42 | # Return a class variable that holds probability data 43 | def self.hmm 44 | return @@hmm 45 | end 46 | 47 | # Return a class variable that holds lexical data 48 | def self.lexicon 49 | return @@lexicon 50 | end 51 | 52 | # Return a regexp from a string argument that matches an XML-style pos tag 53 | def self.get_ext(tag = nil) 54 | return nil unless tag 55 | return Regexp.new("<#{tag}>[^<]+\s*") 56 | end 57 | 58 | # Regexps to match XML-style part-of-speech tags 59 | NUM = get_ext('cd') 60 | GER = get_ext('vbg') 61 | ADJ = get_ext('jj[rs]*') 62 | PART = get_ext('vbn') 63 | NN = get_ext('nn[sp]*') 64 | NNP = get_ext('nnp') 65 | PREP = get_ext('in') 66 | DET = get_ext('det') 67 | PAREN = get_ext('[lr]rb') 68 | QUOT = get_ext('ppr') 69 | SEN = get_ext('pp') 70 | WORD = get_ext('\w+') 71 | 72 | # Convert a Treebank-style, abbreviated tag into verbose definitions 73 | def self.explain_tag(tag) 74 | if TAGS[tag] 75 | return TAGS[tag] 76 | else 77 | return tag 78 | end 79 | end 80 | 81 | # The folloging is to make a hash to convert a pos tag to its definition 82 | # used by the explain_tag method 83 | tags = [ 84 | "CC", "Conjunction, coordinating", 85 | "CD", "Adjective, cardinal number", 86 | "DET", "Determiner", 87 | "EX", "Pronoun, existential there", 88 | "FW", "Foreign words", 89 | "IN", "Preposition / Conjunction", 90 | "JJ", "Adjective", 91 | "JJR", "Adjective, comparative", 92 | "JJS", "Adjective, superlative", 93 | "LS", "Symbol, list item", 94 | "MD", "Verb, modal", 95 | "NN", "Noun", 96 | "NNP", "Noun, proper", 97 | "NNPS", "Noun, proper, plural", 98 | "NNS", "Noun, plural", 99 | "PDT", "Determiner, prequalifier", 100 | "POS", "Possessive", 101 | "PRP", "Determiner, possessive second", 102 | "PRPS", "Determiner, possessive", 103 | "RB", "Adverb", 104 | "RBR", "Adverb, comparative", 105 | "RBS", "Adverb, superlative", 106 | "RP", "Adverb, particle", 107 | "SYM", "Symbol", 108 | "TO", "Preposition", 109 | "UH", "Interjection", 110 | "VB", "Verb, infinitive", 111 | "VBD", "Verb, past tense", 112 | "VBG", "Verb, gerund", 113 | "VBN", "Verb, past/passive participle", 114 | "VBP", "Verb, base present form", 115 | "VBZ", "Verb, present 3SG -s form", 116 | "WDT", "Determiner, question", 117 | "WP", "Pronoun, question", 118 | "WPS", "Determiner, possessive & question", 119 | "WRB", "Adverb, question", 120 | "PP", "Punctuation, sentence ender", 121 | "PPC", "Punctuation, comma", 122 | "PPD", "Punctuation, dollar sign", 123 | "PPL", "Punctuation, quotation mark left", 124 | "PPR", "Punctuation, quotation mark right", 125 | "PPS", "Punctuation, colon, semicolon, elipsis", 126 | "LRB", "Punctuation, left bracket", 127 | "RRB", "Punctuation, right bracket" 128 | ] 129 | tags = tags.collect{|t| t.downcase.gsub(/[\.\,\'\-\s]+/, '_')} 130 | tags = tags.collect{|t| t.gsub(/\&/, "and").gsub(/\//, "or")} 131 | TAGS = Hash[*tags] 132 | 133 | # Hash storing config values: 134 | # 135 | # * :unknown_word_tag 136 | # => (String) Tag to assign to unknown words 137 | # * :stem 138 | # => (Boolean) Stem single words using Porter module 139 | # * :weight_noun_phrases 140 | # => (Boolean) When returning occurrence counts for a noun phrase, multiply 141 | # the valuethe number of words in the NP. 142 | # * :longest_noun_phrase 143 | # => (Integer) Will ignore noun phrases longer than this threshold. This 144 | # affects only the get_words() and get_nouns() methods. 145 | # * :relax 146 | # => (Boolean) Relax the Hidden Markov Model: this may improve accuracy for 147 | # uncommon words, particularly words used polysemously 148 | # * :tag_lex 149 | # => (String) Name of the YAML file containing a hash of adjacent part of 150 | # speech tags and the probability of each 151 | # * :word_lex 152 | # => (String) Name of the YAML file containing a hash of words and corresponding 153 | # parts of speech 154 | # * :unknown_lex 155 | # => (String) Name of the YAML file containing a hash of tags for unknown 156 | # words and corresponding parts of speech 157 | # * :tag_path 158 | # => (String) Directory path of tag_lex 159 | # * :word_path 160 | # => (String) Directory path of word_lex and unknown_lex 161 | # * :debug 162 | # => (Boolean) Print debug messages 163 | attr_accessor :conf 164 | 165 | ############### 166 | # Constructor # 167 | ############### 168 | 169 | # Take a hash of parameters that override default values. 170 | # See above for details. 171 | def initialize(params = {}) 172 | @conf = Hash.new 173 | @conf[:unknown_word_tag] = '' 174 | @conf[:stem] = false 175 | @conf[:weight_noun_phrases] = false 176 | @conf[:longest_noun_phrase] = 5 177 | @conf[:relax] = false 178 | @conf[:tag_lex] = 'tags.yml' 179 | @conf[:word_lex] = 'words.yml' 180 | @conf[:unknown_lex] = 'unknown.yml' 181 | @conf[:word_path] = $word_path 182 | @conf[:tag_path] = $tag_path 183 | @conf[:debug] = false 184 | # assuming that we start analyzing from the beginninga new sentence... 185 | @conf[:current_tag] = 'pp' 186 | @conf.merge(params) if params 187 | unless File.exists?(@conf[:word_path]) and File.exists?(@conf[:tag_path]) 188 | print "Couldn't locate POS lexicon, creating a new one" if @conf[:debug] 189 | @@hmm = Hash.new 190 | @@lexicon = Hash.new 191 | else 192 | lexf = File.open(@conf[:word_path], 'r') 193 | @@lexicon = Marshal.load(lexf) 194 | lexf.close 195 | hmmf = File.open(@conf[:tag_path], 'r') 196 | @@hmm = Marshal.load(hmmf) 197 | hmmf.close 198 | end 199 | @@mnp = get_max_noun_regex 200 | end 201 | 202 | ################## 203 | # Public methods # 204 | ################## 205 | 206 | # Examine the string provided and return it fully tagged in XML style 207 | def add_tags(text, verbose = false) 208 | return nil unless valid_text(text) 209 | tagged = [] 210 | words = clean_text(text) 211 | tags = Array.new 212 | words.each do |word| 213 | cleaned_word = clean_word(word) 214 | tag = assign_tag(@conf[:current_tag], cleaned_word) 215 | @conf[:current_tag] = tag = (tag and tag != "") ? tag : 'nn' 216 | tag = EngTagger.explain_tag(tag) if verbose 217 | tagged << '<' + tag + '>' + word + '' 218 | end 219 | reset 220 | return tagged.join(' ') 221 | end 222 | 223 | # Given a text string, return as many nouns and noun phrases as possible. 224 | # Applies add_tags and involves three stages: 225 | # 226 | # * Tag the text 227 | # * Extract all the maximal noun phrases 228 | # * Recursively extract all noun phrases from the MNPs 229 | # 230 | def get_words(text) 231 | return false unless valid_text(text) 232 | tagged = add_tags(text) 233 | if(@conf[:longest_noun_phrase] <= 1) 234 | return get_nouns(tagged) 235 | else 236 | return get_noun_phrases(tagged) 237 | end 238 | end 239 | 240 | # Return an easy-on-the-eyes tagged version of a text string. 241 | # Applies add_tags and reformats to be easier to read. 242 | def get_readable(text, verbose = false) 243 | return nil unless valid_text(text) 244 | tagged = add_tags(text, verbose) 245 | tagged = tagged.gsub(/<\w+>([^<]+)<\/(\w+)>/o) do 246 | $1 + '/' + $2.upcase 247 | end 248 | return tagged 249 | end 250 | 251 | # Return an array of sentences (without POS tags) from a text. 252 | def get_sentences(text) 253 | return nil unless valid_text(text) 254 | tagged = add_tags(text) 255 | sentences = Array.new 256 | tagged.split(/<\/pp>/).each do |line| 257 | sentences << strip_tags(line) 258 | end 259 | sentences = sentences.map do |sentence| 260 | sentence.gsub(Regexp.new(" ('s?) ")){$1 + ' '} 261 | sentence.gsub(Regexp.new(" (\W+) ")){$1 + ' '} 262 | sentence.gsub(Regexp.new(" (`+) ")){' ' + $1} 263 | sentence.gsub(Regexp.new(" (\W+)$")){$1} 264 | sentence.gsub(Regexp.new("^(`+) ")){$1} 265 | end 266 | return sentences 267 | end 268 | 269 | # Given a POS-tagged text, this method returns a hash of all proper nouns 270 | # and their occurrence frequencies. The method is greedy and will 271 | # return multi-word phrases, if possible, so it would find ``Linguistic 272 | # Data Consortium'' as a single unit, rather than as three individual 273 | # proper nouns. This method does not stem the found words. 274 | def get_proper_nouns(tagged) 275 | return nil unless valid_text(tagged) 276 | trimmed = tagged.scan(NNP).map do |n| 277 | strip_tags(n) 278 | end 279 | nnp = Hash.new(0) 280 | trimmed.each do |n| 281 | next unless n.length < 100 # sanity check on word length 282 | nnp[n] += 1 unless n =~ /\A\s*\z/ 283 | end 284 | # Now for some fancy resolution stuff... 285 | nnp.keys.each do |key| 286 | words = key.split(/\s/) 287 | # Let's say this is an organization's name -- 288 | # (and it's got at least three words) 289 | # is there a corresponding acronym in this hash? 290 | if words.length > 2 291 | # Make a (naive) acronym out of this name 292 | acronym = words.map do |word| 293 | /\A([a-z])[a-z]*\z/ =~ word 294 | $1 295 | end.join '' 296 | # If that acronym has been seen, 297 | # remove it and add the values to 298 | # the full name 299 | if nnp[acronym] 300 | nnp[key] += nnp[acronym] 301 | nnp.delete(acronym) 302 | end 303 | end 304 | end 305 | return nnp 306 | end 307 | 308 | # Given a POS-tagged text, this method returns all nouns and their 309 | # occurrence frequencies. 310 | def get_nouns(tagged) 311 | return nil unless valid_text(tagged) 312 | NN 313 | trimmed = tagged.scan(NN).map do |n| 314 | strip_tags(n) 315 | end 316 | ret = Hash.new(0) 317 | trimmed.each do |n| 318 | n = stem(n) 319 | next unless n.length < 100 # sanity check on word length 320 | ret[n] += 1 unless n =~ /\A\s*\z/ 321 | end 322 | return ret 323 | end 324 | 325 | # Given a POS-tagged text, this method returns only the maximal noun phrases. 326 | # May be called directly, but is also used by get_noun_phrases 327 | def get_max_noun_phrases(tagged) 328 | return unless valid_text(tagged) 329 | mn_phrases = tagged.scan(@@mnp).map do |m| 330 | strip_tags(m) 331 | end 332 | ret = Hash.new(0) 333 | mn_phrases.each do |p| 334 | p = stem(p) unless p =~ /\s/ # stem single words 335 | ret[p] += 1 unless p =~ /\A\s*\z/ 336 | end 337 | return ret 338 | end 339 | 340 | # Similar to get_words, but requires a POS-tagged text as an argument. 341 | def get_noun_phrases(tagged) 342 | return nil unless valid_text(tagged) 343 | found = Hash.new(0) 344 | phrase_ext = /(?:#{PREP}|#{DET}|#{NUM})+/xo 345 | scanned = tagged.scan(@@mnp) 346 | # Find MNPs in the text, one sentence at a time 347 | # Record and split if the phrase is extended by a (?:PREP|DET|NUM) 348 | mn_phrases = [] 349 | scanned.each do |m| 350 | found[m] += 1 if phrase_ext =~ m 351 | mn_phrases += m.split(phrase_ext) 352 | end 353 | mn_phrases.each do |mnp| 354 | # Split the phrase into an array of words, and create a loop for each word, 355 | # shortening the phrase by removing the word in the first position. 356 | # Record the phrase and any single nouns that are found 357 | words = mnp.split 358 | words.length.times do |i| 359 | found[words.join(' ')] += 1 if words.length > 1 360 | w = words.shift 361 | found[w] += 1 if w =~ /#{NN}/ 362 | end 363 | end 364 | ret = Hash.new(0) 365 | found.keys.each do |f| 366 | k = strip_tags(f) 367 | v = found[f] 368 | # We weight by the word count to favor long noun phrases 369 | space_count = k.scan(/\s+/) 370 | word_count = space_count.length + 1 371 | # Throttle MNPs if necessary 372 | next if word_count > @conf[:longest_noun_phrase] 373 | k = stem(k) unless word_count > 1 # stem single words 374 | multiplier = 1 375 | multiplier = word_count if @conf[:weight_noun_phrases] 376 | ret[k] += multiplier * v 377 | end 378 | return ret 379 | end 380 | 381 | # Reads some included corpus data and saves it in a stored hash on the 382 | # local file system. This is called automatically if the tagger can't 383 | # find the stored lexicon. 384 | def install 385 | puts "Creating part-of-speech lexicon" if @conf[:debug] 386 | load_tags(@conf[:tag_lex]) 387 | load_words(@conf[:word_lex]) 388 | load_words(@conf[:unknown_lex]) 389 | File.open(@conf[:word_path], 'w') do |f| 390 | Marshal.dump(@@lexicon, f) 391 | end 392 | File.open(@conf[:tag_path], 'w') do |f| 393 | Marshal.dump(@@hmm, f) 394 | end 395 | end 396 | 397 | ################### 398 | # Private methods # 399 | ################### 400 | 401 | :private 402 | 403 | # Downcase the first letter of word 404 | def lcfirst(word) 405 | word.split(//)[0].downcase + word.split(//)[1..-1].join 406 | end 407 | 408 | # Upcase the first letter of word 409 | def ucfirst(word) 410 | word.split(//)[0].upcase + word.split(//)[1..-1].join 411 | end 412 | 413 | # Return the word stem as given by Stemmable module. This can be 414 | # turned off with the class parameter @conf[:stem] => false. 415 | def stem(word) 416 | return word unless @conf[:stem] 417 | return word.stem 418 | end 419 | 420 | # This method will reset the preceeding tag to a sentence ender (PP). 421 | # This prepares the first word of a new sentence to be tagged correctly. 422 | def reset 423 | @conf[:current_tag] = 'pp' 424 | end 425 | 426 | # Check whether the text is a valid string 427 | def valid_text(text) 428 | if !text 429 | # there's nothing to parse 430 | "method call on uninitialized variable" if @conf[:debug] 431 | return false 432 | elsif /\A\s*\z/ =~ text 433 | # text is an empty string, nothing to parse 434 | return false 435 | else 436 | # $text is valid 437 | return true 438 | end 439 | end 440 | 441 | # Return a text string with the part-of-speech tags removed 442 | def strip_tags(tagged, downcase = false) 443 | return nil unless valid_text(tagged) 444 | text = tagged.gsub(/<[^>]+>/m, "") 445 | text = text.gsub(/\s+/m, " ") 446 | text = text.gsub(/\A\s*/, "") 447 | text = text.gsub(/\s*\z/, "") 448 | if downcase 449 | return text.downcase 450 | else 451 | return text 452 | end 453 | end 454 | 455 | # Strip the provided text of HTML-style tags and separate off any punctuation 456 | # in preparation for tagging 457 | def clean_text(text) 458 | return false unless valid_text(text) 459 | text = text.toutf8 460 | unless $no_hpricot 461 | # Strip out any markup and convert entities to their proper form 462 | cleaned_text = Hpricot(text).inner_text 463 | else 464 | cleaned_text = text 465 | end 466 | tokenized = [] 467 | # Tokenize the text (splitting on punctuation as you go) 468 | cleaned_text.split(/\s+/).each do |line| 469 | tokenized += split_punct(line) 470 | end 471 | words = split_sentences(tokenized) 472 | return words 473 | end 474 | 475 | # This handles all of the trailing periods, keeping those that 476 | # belong on abbreviations and removing those that seem to be 477 | # at the end of sentences. This method makes some assumptions 478 | # about the use of capitalization in the incoming text 479 | def split_sentences(array) 480 | tokenized = array 481 | people = %w(jr mr ms mrs dr prof esq sr sen sens rep reps gov attys attys 482 | supt det mssrs rev) 483 | army = %w(col gen lt cmdr adm capt sgt cpl maj brig) 484 | inst = %w(dept univ assn bros ph.d) 485 | place = %w(arc al ave blvd bld cl ct cres exp expy dist mt mtn ft fy fwy 486 | hwy hway la pde pd plz pl rd st tce) 487 | comp = %w(mfg inc ltd co corp) 488 | state = %w(ala ariz ark cal calif colo col conn del fed fla ga ida id ill 489 | ind ia kans kan ken ky la me md is mass mich minn miss mo mont 490 | neb nebr nev mex okla ok ore penna penn pa dak tenn tex ut vt 491 | va wash wis wisc wy wyo usafa alta man ont que sask yuk) 492 | month = %w(jan feb mar apr may jun jul aug sep sept oct nov dec) 493 | misc = %w(vs etc no esp) 494 | abbr = Hash.new 495 | [people, army, inst, place, comp, state, month, misc].flatten.each do |i| 496 | abbr[i] = true 497 | end 498 | words = Array.new 499 | tokenized.each_with_index do |t, i| 500 | if tokenized[i + 1] and tokenized [i + 1] =~ /[A-Z\W]/ and tokenized[i] =~ /\A(.+)\.\z/ 501 | w = $1 502 | # Don't separate the period off words that 503 | # meet any of the following conditions: 504 | # 505 | # 1. It is defined in one of the lists above 506 | # 2. It is only one letter long: Alfred E. Sloan 507 | # 3. It has a repeating letter-dot: U.S.A. or J.C. Penney 508 | unless abbr[w.downcase] or w =~ /\A[a-z]\z/i or w =~ /[a-z](?:\.[a-z])+\z/i 509 | words << w 510 | words << '.' 511 | next 512 | end 513 | end 514 | words << tokenized[i] 515 | end 516 | # If the final word ends in a period.. 517 | if words[-1] and words[-1] =~ /\A(.*\w)\.\z/ 518 | words[-1] = $1 519 | words.push '.' 520 | end 521 | return words 522 | end 523 | 524 | # Separate punctuation from words, where appropriate. This leaves trailing 525 | # periods in place to be dealt with later. Called by the clean_text method. 526 | def split_punct(text) 527 | # If there's no punctuation, return immediately 528 | return [text] if /\A\w+\z/ =~ text 529 | # Sanity checks 530 | text = text.gsub(/\W{10,}/o, " ") 531 | 532 | # Put quotes into a standard format 533 | text = text.gsub(/`(?!`)(?=.*\w)/o, "∫ ") # Shift left quotes off text 534 | text = text.gsub(/"(?=.*\w)/o, " ∬ ") # Convert left quotes to `` 535 | text = text.gsub(/(\W|^)'(?=.*\w)/o){$1 ? $1 + " ∫ " : " ∫ "} # Convert left quotes to ` 536 | text = text.gsub(/"/, " ∯ ") # Convert (remaining) quotes to '' 537 | text = text.gsub(/(\w|\D)'(?!')(?=\W|$)/o){$1 + " ∮ "} # Separate right single quotes 538 | 539 | # Handle all other punctuation 540 | text = text.gsub(/--+/o, " - ") # Convert and separate dashes 541 | text = text.gsub(/,(?!\d)/o, " , ") # Shift commas off everything but numbers 542 | if text.include? ":" 543 | unless (/\A\d+/ === text.to_s.partition(':').last[0]) && (/\A\d+/ === text.to_s.partition(':').first[-1]) 544 | text = text.gsub(/:/o, " :") # Shift semicolons off 545 | end 546 | end 547 | text = text.gsub(/(\.\.\.+)/o){" " + $1 + " "} # Shift ellipses off 548 | text = text.gsub(/([\(\[\{\}\]\)])/o){" " + $1 + " "} # Shift off brackets 549 | text = text.gsub(/([\!\?#\$%;~|])/o){" " + $1 + " "} # Shift off other ``standard'' punctuation 550 | 551 | # English-specific contractions 552 | text = text.gsub(/([A-Za-z])'([dms])\b/o){$1 + "ƪ" + $2} # Separate off 'd 'm 's 553 | text = text.gsub(/n't\b/o, "nƪt") # Separate off n't 554 | text = text.gsub(/'(ve|ll|re)\b/o){"ƪ" + $1} # Separate off 've, 'll, 're 555 | result = text.split(' ') 556 | return result 557 | end 558 | 559 | # Given a preceding tag, assign a tag word. Called by the add_tags method. 560 | # This method is a modified version of the Viterbi algorithm for part-of-speech tagging 561 | def assign_tag(prev_tag, word) 562 | if word == "-unknown-" 563 | # classify unknown words accordingly 564 | return @conf[:unknown_word_tag] 565 | elsif word == "-sym-" 566 | # If this is a symbol, tag it as a symbol 567 | return "sym" 568 | end 569 | best_so_far = 0 570 | w = @@lexicon[word] 571 | t = @@hmm 572 | 573 | # TAG THE TEXT: What follows is a modified version of the Viterbi algorithm 574 | # which is used in most POS taggers 575 | best_tag = "" 576 | t[prev_tag].keys.each do |tag| 577 | # With @config[:relax] set, this method 578 | # will also include any `open classes' of POS tags 579 | pw = 0 580 | if w[tag] 581 | pw = w[tag] 582 | elsif @conf[:relax] and tag =~ /\A(?:jj|nn|rb|vb)/ 583 | pw = 0 584 | else 585 | next 586 | end 587 | 588 | # Bayesian logic: 589 | # P = P( tag | prev_tag ) * P( tag | word ) 590 | probability = t[prev_tag][tag] * (pw + 1) 591 | # Set the tag with maximal probability 592 | if probability > best_so_far 593 | best_so_far = probability 594 | best_tag = tag 595 | end 596 | end 597 | return best_tag 598 | end 599 | 600 | # This method determines whether a word should be considered in its 601 | # lower or upper case form. This is useful in considering proper nouns 602 | # and words that begin sentences. Called by add_tags. 603 | def clean_word(word) 604 | lcf = lcfirst(word) 605 | # seen this word as it appears (lower or upper case) 606 | if @@lexicon[word] 607 | return word 608 | elsif @@lexicon[lcf] 609 | # seen this word only as lower case 610 | return lcf 611 | else 612 | # never seen this word. guess. 613 | return classify_unknown_word(word) 614 | end 615 | end 616 | 617 | # This changes any word not appearing in the lexicon to identifiable 618 | # classes of words handled by a simple unknown word classification 619 | # metric. Called by the clean_word method. 620 | def classify_unknown_word(word) 621 | if /[\(\{\[]/ =~ word # Left brackets 622 | classified = "*LRB*" 623 | elsif 624 | /[\)\}\]]/ =~ word # Right brackets 625 | classified = "*RRB*" 626 | elsif /-?(?:\d+(?:\.\d*)?|\.\d+)\z/ =~ word # Floating point number 627 | classified = "*NUM*" 628 | elsif /\A\d+[\d\/:-]+\d\z/ =~ word # Other number constructs 629 | classified = "*NUM*" 630 | elsif /\A-?\d+\w+\z/o =~ word # Ordinal number 631 | classified = "*ORD*" 632 | elsif /\A[A-Z][A-Z\.-]*\z/o =~ word # Abbreviation (all caps) 633 | classified = "-abr-" 634 | elsif /\w-\w/o =~ word # Hyphenated word 635 | /-([^-]+)\z/ =~ word 636 | h_suffix = $1 637 | if h_suffix and (@@lexicon[h_suffix] and @@lexicon[h_suffix]['jj']) 638 | # last part of this is defined as an adjective 639 | classified = "-hyp-adj-" 640 | else 641 | # last part of this is not defined as an adjective 642 | classified = "-hyp-" 643 | end 644 | elsif /\A\W+\z/o =~ word 645 | classified = "-sym-" # Symbol 646 | elsif word == ucfirst(word) 647 | classified = "-cap-" # Capitalized word 648 | elsif /ing\z/o =~ word 649 | classified = "-ing-" # Ends in 'ing' 650 | elsif /s\z/o =~ word 651 | classified = "-s-" # Ends in 's' 652 | elsif /tion\z/o =~ word 653 | classified = "-tion-" # Ends in 'tion' 654 | elsif /ly\z/o =~ word 655 | classified = "-ly-" # Ends in 'ly' 656 | elsif /ed\z/o =~ word 657 | classified = "-ed-" # Ends in 'ed 658 | else 659 | classified = "-unknown-" # Completely unknown 660 | end 661 | return classified 662 | end 663 | 664 | # This returns a compiled regexp for extracting maximal noun phrases 665 | # from a POS-tagged text. 666 | def get_max_noun_regex 667 | regex = / 668 | # optional number, gerund - adjective -participle 669 | (?:#{NUM})?(?:#{GER}|#{ADJ}|#{PART})* 670 | # Followed by one or more nouns 671 | (?:#{NN})+ 672 | (?: 673 | # Optional preposition, determinant, cardinal 674 | (?:#{PREP})*(?:#{DET})?(?:#{NUM})? 675 | # Optional gerund-adjective -participle 676 | (?:#{GER}|#{ADJ}|#{PART})* 677 | # one or more nouns 678 | (?:#{NN})+ 679 | )* 680 | /xo #/ 681 | return regex 682 | end 683 | 684 | # Load the 2-grams into a hash from YAML data: This is a naive (but fast) 685 | # YAML data parser. It will load a YAML document with a collection of key: 686 | # value entries ( {pos tag}: {probability} ) mapped onto single keys ( {tag} ). 687 | # Each map is expected to be on a single line; i.e., det: { jj: 0.2, nn: 0.5, vb: 0.0002 } 688 | def load_tags(lexicon) 689 | path = File.join($lexpath, lexicon) 690 | fh = File.open(path, 'r') 691 | while line = fh.gets 692 | /\A"?([^{"]+)"?: \{ (.*) \}/ =~ line 693 | next unless $1 and $2 694 | key, data = $1, $2 695 | tags = Hash.new 696 | items = data.split(/,\s+/) 697 | pairs = {} 698 | items.each do |i| 699 | /([^:]+):\s*(.+)/ =~ i 700 | pairs[$1] = $2.to_f 701 | end 702 | @@hmm[key] = pairs 703 | end 704 | fh.close 705 | end 706 | 707 | # Load the 2-grams into a hash from YAML data: This is a naive (but fast) 708 | # YAML data parser. It will load a YAML document with a collection of key: 709 | # value entries ( {pos tag}: {count} ) mapped onto single keys ( {a word} ). 710 | # Each map is expected to be on a single line; i.e., key: { jj: 103, nn: 34, vb: 1 } 711 | def load_words(lexicon) 712 | path = File.join($lexpath, lexicon) 713 | fh = File.open(path, 'r') 714 | while line = fh.gets 715 | /\A"?([^{"]+)"?: \{ (.*) \}/ =~ line 716 | next unless $1 and $2 717 | key, data = $1, $2 718 | tags = Hash.new 719 | items = data.split(/,\s+/) 720 | pairs = {} 721 | items.each do |i| 722 | /([^:]+):\s*(.+)/ =~ i 723 | pairs[$1] = $2.to_f 724 | end 725 | @@lexicon[key] = pairs 726 | end 727 | fh.close 728 | end 729 | 730 | #memoize the stem and assign_tag methods 731 | memoize("stem") 732 | memoize("assign_tag") 733 | end 734 | 735 | -------------------------------------------------------------------------------- /lib/engtagger/pos_tags.hash: -------------------------------------------------------------------------------- 1 | {1"pp{/I"cc:ETf0.0525182I"cd;Tf0.00978944I"det;Tf 0.205127I"ex;Tf0.00404302I"fw;Tf0.000164183I"in;Tf 0.119587I"jj;Tf0.0381932I"jjr;Tf0.00164183I"jjs;Tf0.00217543I"lrb;Tf0.00441243I"ls;Tf0.000759348I"md;Tf0.000677256I"nn;Tf0.0371465I"nnp;Tf 0.185178I" nnps;Tf0.00188811I"nns;Tf0.0392193I"pdt;Tf0.000656734I"pp;Tf0.00014366I"ppc;Tf6.15688e-5I"ppd;Tf0.00014366I"ppl;Tf0.0753602I"ppr;Tf0.0597628I"pps;Tf0.00285269I"prp;Tf0.0552888I" prps;Tf0.00722407I"rb;Tf0.0521693I"rbr;Tf0.00186759I"rbs;Tf0.000513073I"rrb;Tf0.00527439I"sym;Tf0.000779871I"to;Tf0.00316053I"uh;Tf0.000595165I"vb;Tf0.00303739I"vbd;Tf0.000718302I"vbg;Tf0.0111439I"vbn;Tf0.00547962I"vbp;Tf0.00034889I"vbz;Tf0.00135451I"wdt;Tf0.000636211I"wp;Tf0.00289373I"wps;Tf2.05229e-5I"wrb;Tf0.00599269"pdt{ I"det;Tf 0.913832I"jj;Tf0.00226757I"nnp;Tf0.00226757I" prps;Tf0.0816327"nn{0I"cc;Tf0.0397962I"cd;Tf0.00599719I"det;Tf0.00682082I"ex;Tf0.000109816I"fw;Tf6.10091e-5I"in;Tf 0.247752I"jj;Tf0.0087182I"jjr;Tf0.00111037I"jjs;Tf5.49082e-5I"lrb;Tf0.00157403I"ls;Tf6.10091e-6I"md;Tf0.0175218I"nn;Tf 0.122201I"nnp;Tf0.00971265I" nnps;Tf7.93118e-5I"nns;Tf0.0785187I"pdt;Tf1.22018e-5I"pos;Tf0.0216765I"pp;Tf 0.10859I"ppc;Tf 0.115027I"ppd;Tf0.000256238I"ppl;Tf0.00240986I"ppr;Tf0.00516137I"pps;Tf0.0116832I"prp;Tf0.00430724I" prps;Tf0.000274541I"rb;Tf0.0177536I"rbr;Tf0.00253188I"rbs;Tf 6.711e-5I"rp;Tf0.000585687I"rrb;Tf0.00173876I"sym;Tf4.88073e-5I"to;Tf0.0394485I"vb;Tf0.00140321I"vbd;Tf0.0485022I"vbg;Tf0.00755293I"vbn;Tf0.0103593I"vbp;Tf0.0040022I"vbz;Tf0.0437618I"wdt;Tf0.00787017I"wp;Tf0.00240376I"wps;Tf0.000170825I"wrb;Tf0.00236715"jj{.I"cc;Tf0.0172469I"cd;Tf0.0161175I"det;Tf0.00358756I"ex;Tf5.31491e-5I"fw;Tf0.000132873I"in;Tf0.0560723I"jj;Tf0.0733723I"jjr;Tf0.000704225I"jjs;Tf0.000345469I"lrb;Tf0.000651076I"md;Tf0.000398618I"nn;Tf 0.44992I"nnp;Tf0.0368057I" nnps;Tf0.00159447I"nns;Tf 0.23282I"pdt;Tf2.65745e-5I"pos;Tf0.000172735I"pp;Tf0.0243689I"ppc;Tf0.0293649I"ppd;Tf0.00265745I"ppl;Tf0.0020861I"ppr;Tf0.00478342I"pps;Tf0.00309593I"prp;Tf0.00139516I" prps;Tf0.000265745I"rb;Tf0.00356099I"rbr;Tf0.000225884I"rbs;Tf9.30109e-5I"rp;Tf0.000106298I"rrb;Tf0.000558065I"to;Tf0.0274382I"uh;Tf1.32873e-5I"vb;Tf0.000106298I"vbd;Tf0.00132873I"vbg;Tf0.00324209I"vbn;Tf0.00191337I"vbp;Tf0.000983258I"vbz;Tf0.00139516I"wdt;Tf0.000119585I"wp;Tf0.000239171I"wrb;Tf0.000637789"cc{-I"cc;Tf0.000237618I"cd;Tf0.0439594I"det;Tf 0.113785I"ex;Tf0.00349638I"fw;Tf0.000101836I"in;Tf0.0513256I"jj;Tf 0.108829I"jjr;Tf0.0102855I"jjs;Tf0.00207067I"lrb;Tf0.000339455I"ls;Tf6.7891e-5I"md;Tf0.0106928I"nn;Tf 0.118639I"nnp;Tf 0.161682I" nnps;Tf0.00332666I"nns;Tf0.0710139I"pdt;Tf0.000577073I"ppc;Tf0.00763773I"ppd;Tf0.0195865I"ppl;Tf0.00590651I"pps;Tf0.000305509I"prp;Tf0.0376116I" prps;Tf0.0172104I"rb;Tf0.0518687I"rbr;Tf0.00264775I"rbs;Tf0.000882583I"rp;Tf0.000101836I"rrb;Tf6.7891e-5I"to;Tf0.00740012I"uh;Tf6.7891e-5I"vb;Tf0.0327574I"vbd;Tf0.0380189I"vbg;Tf0.0210462I"vbn;Tf0.0144268I"vbp;Tf 0.011813I"vbz;Tf0.0227435I"wdt;Tf0.00105231I"wp;Tf0.00213857I"wps;Tf0.000237618I"wrb;Tf0.00403951"vb{/I"cc;Tf0.00924334I"cd;Tf0.0202371I"det;Tf 0.223099I"ex;Tf0.000552758I"fw;Tf6.14175e-5I"in;Tf 0.112548I"jj;Tf0.0841727I"jjr;Tf0.0105024I"jjs;Tf0.000675593I"lrb;Tf0.00113622I"md;Tf0.000460631I"nn;Tf0.0619703I"nnp;Tf0.0317221I" nnps;Tf0.00049134I"nns;Tf0.0495025I"pdt;Tf0.00156615I"pos;Tf0.000184253I"pp;Tf0.0251198I"ppc;Tf0.0173812I"ppd;Tf0.00896696I"ppl;Tf0.00568112I"ppr;Tf0.00242599I"pps;Tf0.00251812I"prp;Tf0.0368198I" prps;Tf 0.043545I"rb;Tf0.0489498I"rbr;Tf0.00500553I"rbs;Tf0.000276379I"rp;Tf0.0320292I"rrb;Tf0.000460631I"to;Tf0.0424395I"uh;Tf9.21263e-5I"vb;Tf0.00518978I"vbd;Tf0.00138189I"vbg;Tf 0.017504I"vbn;Tf 0.084664I"vbp;Tf0.000368505I"vbz;Tf0.00153544I"wdt;Tf0.000644884I"wp;Tf0.00365434I"wps;Tf6.14175e-5I"wrb;Tf0.00515907"ppl{'I"cc;Tf0.0184726I"cd;Tf0.00777202I"det;Tf 0.147894I"ex;Tf0.0210633I"fw;Tf0.0032665I"in;Tf0.0637531I"jj;Tf 0.113877I"jjr;Tf0.0032665I"jjs;Tf0.0032665I"lrb;Tf0.000225276I"md;Tf0.0117144I"nn;Tf0.0839153I"nnp;Tf0.0762559I" nnps;Tf0.00168957I"nns;Tf0.0355936I"pdt;Tf0.000675828I"ppd;Tf0.000225276I"ppl;Tf0.000337914I"pps;Tf0.000675828I"prp;Tf 0.215026I" prps;Tf 0.010588I"rb;Tf0.0527146I"rbr;Tf0.00168957I"to;Tf0.00518135I"uh;Tf0.00337914I"vb;Tf0.0259067I"vbd;Tf0.0114891I"vbg;Tf0.0149809I"vbn;Tf0.0111512I"vbp;Tf0.0117144I"vbz;Tf0.0198243I"wdt;Tf0.00146429I"wp;Tf0.00923631I"wrb;Tf0.0117144"md{#I"cc;Tf0.00100025I"cd;Tf0.000166708I"det;Tf0.00408435I"in;Tf0.00175044I"jj;Tf0.000500125I"jjr;Tf8.33542e-5I"jjs;Tf8.33542e-5I"lrb;Tf0.000166708I"md;Tf0.000166708I"nn;Tf0.000833542I"nnp;Tf0.000916896I"nns;Tf0.000666833I"pp;Tf0.0020005I"ppc;Tf0.00325081I"ppl;Tf0.00483454I"ppr;Tf0.000166708I"pps;Tf0.000750188I"prp;Tf0.00500125I" prps;Tf0.000250063I"rb;Tf 0.169459I"rbr;Tf0.00125031I"rbs;Tf0.000166708I"rrb;Tf0.000166708I"to;Tf0.0030841I"vb;Tf 0.797699I"vbd;Tf0.000500125I"vbg;Tf8.33542e-5I"vbn;Tf0.000583479I"vbp;Tf0.000250063I"vbz;Tf8.33542e-5"cd{,I"cc;Tf0.0167197I"cd;Tf 0.198722I"det;Tf0.0292094I"fw;Tf4.45266e-5I"in;Tf0.0892758I"jj;Tf0.0370906I"jjr;Tf0.00211501I"jjs;Tf0.000779215I"lrb;Tf0.00189238I"md;Tf0.00211501I"nn;Tf 0.205067I"nnp;Tf0.0134693I" nnps;Tf0.000868268I"nns;Tf 0.156511I"pos;Tf0.000734688I"pp;Tf0.0724002I"ppc;Tf0.0968453I"ppd;Tf8.90531e-5I"ppl;Tf0.000690162I"ppr;Tf0.000489792I"pps;Tf0.00761404I"prp;Tf0.000734688I" prps;Tf8.90531e-5I"rb;Tf0.00429681I"rbr;Tf0.00020037I"rbs;Tf0.00020037I"rrb;Tf0.00866042I"sym;Tf0.00013358I"to;Tf0.0362892I"vb;Tf4.45266e-5I"vbd;Tf0.00616693I"vbg;Tf0.00169201I"vbn;Tf0.00311686I"vbp;Tf0.00129127I"vbz;Tf0.00262707I"wdt;Tf0.000756952I"wp;Tf0.000244896I"wps;Tf2.22633e-5I"wrb;Tf0.000690162"vbn{,I"cc;Tf0.0132743I"cd;Tf0.0116251I"det;Tf0.0675382I"ex;Tf8.04505e-5I"in;Tf 0.363596I"jj;Tf0.0445696I"jjr;Tf0.0033387I"jjs;Tf0.000201126I"lrb;Tf0.000402253I"md;Tf0.00116653I"nn;Tf0.0660097I"nnp;Tf0.0261866I" nnps;Tf0.000201126I"nns;Tf0.0381738I"pdt;Tf0.000643604I"pos;Tf0.000120676I"pp;Tf0.0502011I"ppc;Tf0.0331054I"ppd;Tf0.00659694I"ppl;Tf0.00514883I"ppr;Tf0.00217216I"pps;Tf0.00418343I"prp;Tf0.00897023I" prps;Tf0.00993564I"rb;Tf0.0495173I"rbr;Tf0.00337892I"rbs;Tf0.000241352I"rp;Tf0.0212389I"rrb;Tf0.000724055I"to;Tf 0.106315I"vb;Tf0.000643604I"vbd;Tf0.00213194I"vbg;Tf0.0203138I"vbn;Tf0.0319791I"vbp;Tf0.00116653I"vbz;Tf0.00168946I"wdt;Tf0.000241352I"wp;Tf0.00076428I"wrb;Tf0.00221239"vbd{-I"cc;Tf0.00304146I"cd;Tf0.0627501I"det;Tf 0.16768I"ex;Tf0.00128061I"fw;Tf2.66795e-5I"in;Tf 0.113841I"jj;Tf0.0564004I"jjr;Tf0.00749693I"jjs;Tf0.000800384I"lrb;Tf0.000320154I"md;Tf0.000373513I"nn;Tf0.0329491I"nnp;Tf0.0568806I" nnps;Tf0.000133397I"nns;Tf0.0232912I"pdt;Tf0.000693666I"pp;Tf0.0460488I"ppc;Tf0.0231845I"ppd;Tf0.0156342I"ppl;Tf0.00658983I"ppr;Tf0.000346833I"pps;Tf0.00317486I"prp;Tf0.0606424I" prps;Tf0.0258791I"rb;Tf0.0855877I"rbr;Tf0.00381516I"rbs;Tf0.000373513I"rp;Tf0.0159543I"rrb;Tf0.000106718I"to;Tf0.0569874I"uh;Tf0.000106718I"vb;Tf0.00293474I"vbd;Tf0.00200096I"vbg;Tf0.0234779I"vbn;Tf0.0963663I"vbp;Tf0.000106718I"vbz;Tf0.000320154I"wdt;Tf0.000160077I"wp;Tf0.000586948I"wrb;Tf0.00165413"rrb{(I"cc;Tf 0.062759I"cd;Tf0.00947306I"det;Tf0.0396684I"ex;Tf0.000592066I"fw;Tf0.000592066I"in;Tf 0.123742I"jj;Tf0.0201303I"jjr;Tf0.00118413I"jjs;Tf0.000592066I"lrb;Tf0.000592066I"md;Tf0.0213144I"nn;Tf0.0568384I"nnp;Tf0.0550622I" nnps;Tf0.000592066I"nns;Tf0.0219065I"pp;Tf 0.137951I"ppc;Tf 0.168147I"ppd;Tf0.000592066I"ppl;Tf0.0053286I"pps;Tf0.0781528I"prp;Tf0.00947306I" prps;Tf0.00118413I"rb;Tf0.0242747I"rbs;Tf0.000592066I"sym;Tf0.000592066I"to;Tf0.0219065I"vb;Tf0.00828893I"vbd;Tf0.0367081I"vbg;Tf0.0053286I"vbn;Tf0.00651273I"vbp;Tf0.0219065I"vbz;Tf0.0479574I"wdt;Tf0.0053286I"wp;Tf0.0035524I"wrb;Tf0.00118413"rbr{ I"cc;Tf0.0206897I"cd;Tf0.00045977I"det;Tf0.0845977I"in;Tf 0.234483I"jj;Tf 0.312644I"jjr;Tf0.00045977I"lrb;Tf0.00045977I"md;Tf0.00229885I"nn;Tf0.00505747I"nns;Tf0.00045977I"pp;Tf 0.121379I"ppc;Tf0.0616092I"ppl;Tf0.00229885I"pps;Tf0.00689655I"prp;Tf0.00321839I"rb;Tf0.0731035I"rbr;Tf0.00045977I"rp;Tf0.00045977I"rrb;Tf0.00045977I"to;Tf0.0174713I"vb;Tf0.0124138I"vbd;Tf0.00735632I"vbg;Tf0.00551724I"vbn;Tf0.0206897I"vbp;Tf0.00091954I"vbz;Tf0.00321839I"wrb;Tf0.00091954"wp{#I"cd;Tf0.0017319I"det;Tf0.0439903I"in;Tf 0.017319I"jj;Tf0.00623485I"jjr;Tf0.000692761I"jjs;Tf0.000692761I"lrb;Tf0.00034638I"ls;Tf0.00034638I"md;Tf0.0651195I"nn;Tf0.0128161I"nnp;Tf0.0193973I"nns;Tf0.00969865I"pdt;Tf0.00034638I"pos;Tf0.00034638I"pp;Tf0.00138552I"ppc;Tf0.00692761I"ppl;Tf0.000692761I"prp;Tf0.0841704I" prps;Tf0.0051957I"rb;Tf0.0450294I"rbr;Tf0.000692761I"rbs;Tf0.00034638I"rrb;Tf0.00103914I"to;Tf0.00692761I"vb;Tf0.00381018I"vbd;Tf 0.271562I"vbn;Tf0.000692761I"vbp;Tf 0.164184I"vbz;Tf 0.227918I"wdt;Tf0.00034638"vbz{-I"cc;Tf0.00276149I"cd;Tf0.0202761I"det;Tf 0.162928I"ex;Tf0.000718744I"fw;Tf3.78286e-5I"in;Tf0.0886325I"jj;Tf 0.073501I"jjr;Tf0.00809533I"jjs;Tf0.000643087I"lrb;Tf0.000302629I"md;Tf0.000794401I"nn;Tf0.0347267I"nnp;Tf0.0600719I" nnps;Tf0.0002648I"nns;Tf0.0160393I"pdt;Tf0.000529601I"pp;Tf0.0305277I"ppc;Tf0.0251182I"ppd;Tf0.00446378I"ppl;Tf0.0099111I"ppr;Tf0.000226972I"pps;Tf0.00419898I"prp;Tf0.0231511I" prps;Tf0.0117269I"rb;Tf 0.135616I"rbr;Tf0.00397201I"rbs;Tf0.000870059I"rp;Tf0.00696047I"rrb;Tf0.000189143I"to;Tf 0.05122I"uh;Tf3.78286e-5I"vb;Tf0.00321543I"vbd;Tf0.00287498I"vbg;Tf0.0592775I"vbn;Tf 0.148515I"vbp;Tf0.000189143I"vbz;Tf0.000945716I"wdt;Tf0.000302629I"wp;Tf0.00223189I"wrb;Tf0.00393418"rbs{I"det;Tf0.0036036I"in;Tf 0.045045I"jj;Tf 0.72973I"lrb;Tf0.0018018I"md;Tf0.0018018I"nn;Tf0.0018018I"nns;Tf0.0036036I"pp;Tf0.00720721I"ppc;Tf0.00720721I"ppl;Tf0.0018018I"pps;Tf0.0036036I"prp;Tf0.0018018I"rb;Tf 0.115315I"to;Tf0.00900901I"vb;Tf0.0036036I"vbd;Tf0.0018018I"vbg;Tf0.0018018I"vbn;Tf0.0540541I"vbp;Tf0.0036036I"vbz;Tf0.0018018"prp{.I"cc;Tf0.00781945I"cd;Tf0.000936461I"det;Tf0.0118931I"ex;Tf0.000140469I"fw;Tf4.68231e-5I"in;Tf0.0351641I"jj;Tf0.0077258I"jjr;Tf0.00149834I"jjs;Tf9.36461e-5I"lrb;Tf0.000936461I"md;Tf 0.123566I"nn;Tf0.00280938I"nnp;Tf0.000936461I" nnps;Tf4.68231e-5I"nns;Tf0.00103011I"pdt;Tf9.36461e-5I"pos;Tf0.00112375I"pp;Tf0.0305755I"ppc;Tf 0.021164I"ppd;Tf0.000234115I"ppl;Tf0.00355855I"ppr;Tf0.000421407I"pps;Tf0.00323079I"prp;Tf0.00163881I" prps;Tf0.000140469I"rb;Tf0.0536592I"rbr;Tf0.00145151I"rbs;Tf0.000234115I"rp;Tf0.00421407I"rrb;Tf0.000468231I"to;Tf0.0169968I"vb;Tf0.0125018I"vbd;Tf 0.252002I"vbg;Tf0.00257527I"vbn;Tf0.0024348I"vbp;Tf 0.176242I"vbz;Tf 0.21843I"wdt;Tf0.000280938I"wp;Tf0.000655523I"wps;Tf4.68231e-5I"wrb;Tf0.000983284"ppc{,I"cc;Tf0.0919462I"cd;Tf0.0208826I"det;Tf 0.13353I"ex;Tf0.00257932I"fw;Tf0.000314148I"in;Tf0.0867545I"jj;Tf0.0418148I"jjr;Tf0.00165341I"jjs;Tf0.000975513I"lrb;Tf0.000314148I"ls;Tf4.96024e-5I"md;Tf0.0101519I"nn;Tf0.0487426I"nnp;Tf 0.127924I" nnps;Tf0.00105818I"nns;Tf0.0260578I"pdt;Tf0.000214944I"ppd;Tf0.00221557I"ppl;Tf0.0133265I"ppr;Tf0.0579356I"pps;Tf8.26706e-5I"prp;Tf0.0412692I" prps;Tf0.00410046I"rb;Tf0.0546122I"rbr;Tf0.000777104I"rbs;Tf0.000363751I"rp;Tf8.26706e-5I"to;Tf0.00945752I"uh;Tf0.000396819I"vb;Tf0.00376978I"vbd;Tf0.0529588I"vbg;Tf 0.044113I"vbn;Tf0.0206842I"vbp;Tf0.00866388I"vbz;Tf 0.031481I"wdt;Tf0.0360278I"wp;Tf0.0122848I"wps;Tf0.00219904I"wrb;Tf0.00823399" nnps{(I"cc;Tf0.0787172I"cd;Tf0.000971817I"det;Tf0.00323939I"ex;Tf0.000647878I"in;Tf0.0686751I"jj;Tf0.00615484I"jjr;Tf0.000323939I"lrb;Tf0.00291545I"md;Tf0.0233236I"nn;Tf0.0379009I"nnp;Tf 0.284742I" nnps;Tf0.0145773I"nns;Tf0.0119857I"pos;Tf0.0255912I"pp;Tf0.0767736I"ppc;Tf 0.129252I"ppl;Tf0.00129576I"ppr;Tf0.00323939I"pps;Tf0.0207321I"prp;Tf0.00129576I" prps;Tf0.000323939I"rb;Tf0.0136054I"rbr;Tf0.000323939I"rrb;Tf0.00226757I"sym;Tf0.000323939I"to;Tf0.0132815I"vb;Tf0.000971817I"vbd;Tf 0.080013I"vbg;Tf0.00356333I"vbn;Tf0.00259151I"vbp;Tf0.0553936I"vbz;Tf0.0259151I"wdt;Tf0.000323939I"wp;Tf0.00842242I"wps;Tf0.000323939"wps{I"cd;Tf0.0182648I"jj;Tf 0.182648I"jjr;Tf0.00456621I"jjs;Tf0.00913242I"nn;Tf 0.351598I"nnp;Tf0.0593607I"nns;Tf 0.328767I"ppd;Tf0.0182648I"ppl;Tf0.00456621I"rb;Tf0.00456621I"vbg;Tf0.0136986I"vbn;Tf0.00456621"vbp{.I"cc;Tf0.00429212I"cd;Tf0.00897444I"det;Tf 0.119594I"ex;Tf0.0018209I"fw;Tf6.50322e-5I"in;Tf0.0926058I"jj;Tf0.0856474I"jjr;Tf0.00851922I"jjs;Tf0.00058529I"lrb;Tf0.000650322I"md;Tf0.00169084I"nn;Tf0.0299798I"nnp;Tf0.0186642I" nnps;Tf0.000325161I"nns;Tf0.0325161I"pdt;Tf0.000780386I"pos;Tf0.000130064I"pp;Tf0.0194446I"ppc;Tf0.0195747I"ppd;Tf0.00266632I"ppl;Tf0.00487741I"ppr;Tf0.000455225I"pps;Tf0.00273135I"prp;Tf0.0357027I" prps;Tf0.0108604I"rb;Tf 0.164076I"rbr;Tf0.00526761I"rbs;Tf0.000845418I"rp;Tf0.00903947I"rrb;Tf0.000325161I"to;Tf0.0516356I"vb;Tf0.00279638I"vbd;Tf0.0036418I"vbg;Tf 0.084867I"vbn;Tf 0.165702I"vbp;Tf0.00117058I"vbz;Tf0.00260129I"wdt;Tf0.000260129I"wp;Tf0.00234116I"wps;Tf6.50322e-5I"wrb;Tf0.00221109"ppd{I"cd;Tf 0.990264I"jj;Tf0.00973559"vbg{.I"cc;Tf0.00981521I"cd;Tf0.0179306I"det;Tf 0.184789I"ex;Tf0.000219334I"in;Tf 0.139826I"jj;Tf0.0721062I"jjr;Tf0.00937654I"jjs;Tf0.000658003I"lrb;Tf0.000383835I"md;Tf0.000493502I"nn;Tf 0.12694I"nnp;Tf0.0395898I" nnps;Tf0.000658003I"nns;Tf0.0894336I"pdt;Tf0.00109667I"pos;Tf5.48336e-5I"pp;Tf0.0169984I"ppc;Tf0.0129407I"ppd;Tf0.00690903I"ppl;Tf0.00509952I"ppr;Tf0.00208368I"pps;Tf0.00197401I"prp;Tf0.0244558I" prps;Tf0.0302681I"rb;Tf0.0384932I"rbr;Tf0.00438669I"rbs;Tf0.000109667I"rp;Tf0.0269781I"rrb;Tf0.000438669I"sym;Tf5.48336e-5I"to;Tf0.0953008I"uh;Tf0.000164501I"vb;Tf0.000877337I"vbd;Tf0.00307068I"vbg;Tf0.00433185I"vbn;Tf0.0253879I"vbp;Tf0.000548336I"vbz;Tf0.00202884I"wdt;Tf0.000164501I"wp;Tf0.00202884I"wrb;Tf0.00153534"to{*I"cc;Tf0.000473692I"cd;Tf0.0787422I"det;Tf 0.112593I"fw;Tf7.28757e-5I"in;Tf0.00393529I"jj;Tf0.0312272I"jjr;Tf0.0029879I"jjs;Tf0.000255065I"lrb;Tf0.000255065I"nn;Tf0.0318102I"nnp;Tf0.0450736I" nnps;Tf0.000364378I"nns;Tf0.0237939I"pdt;Tf0.000291503I"pp;Tf0.000692319I"ppc;Tf0.000728757I"ppd;Tf0.0441991I"ppl;Tf0.00389885I"ppr;Tf0.000109314I"pps;Tf7.28757e-5I"prp;Tf0.00513774I" prps;Tf0.0133727I"rb;Tf0.00932809I"rbr;Tf0.000947384I"rbs;Tf3.64378e-5I"rrb;Tf3.64378e-5I"to;Tf7.28757e-5I"uh;Tf3.64378e-5I"vb;Tf 0.578706I"vbd;Tf0.000109314I"vbg;Tf0.00688675I"vbn;Tf0.00142108I"vbp;Tf3.64378e-5I"vbz;Tf7.28757e-5I"wdt;Tf0.000728757I"wp;Tf0.00102026I"wrb;Tf0.000473692"rb{.I"cc;Tf0.00916326I"cd;Tf0.0408682I"det;Tf0.0473348I"ex;Tf0.000811603I"fw;Tf5.23615e-5I"in;Tf 0.127893I"jj;Tf 0.102079I"jjr;Tf0.0129071I"jjs;Tf0.000628338I"lrb;Tf0.000575977I"md;Tf0.0102367I"nn;Tf0.0117552I"nnp;Tf0.00685936I" nnps;Tf0.000261808I"nns;Tf0.00463399I"pdt;Tf0.000654519I"pos;Tf0.000183265I"pp;Tf0.0496125I"ppc;Tf0.0972091I"ppd;Tf0.0123311I"ppl;Tf0.00170175I"ppr;Tf0.0014923I"pps;Tf 0.004346I"prp;Tf0.00877055I" prps;Tf0.00235627I"rb;Tf0.0726254I"rbr;Tf0.0077495I"rbs;Tf0.000209446I"rp;Tf0.00034035I"rrb;Tf0.000445073I"sym;Tf2.61808e-5I"to;Tf 0.02694I"vb;Tf 2 | 0.102I"vbd;Tf0.0548749I"vbg;Tf0.0303435I"vbn;Tf 0.081108I"vbp;Tf0.0255262I"vbz;Tf0.0383024I"wdt;Tf0.000261808I"wp;Tf0.00120431I"wrb;Tf0.00332496"nnp{/I"cc;Tf0.0419473I"cd;Tf 0.01911I"det;Tf0.0026398I"ex;Tf1.75402e-5I"fw;Tf0.000429734I"in;Tf0.0407721I"jj;Tf0.00841928I"jjr;Tf8.77008e-5I"jjs;Tf8.77008e-6I"lrb;Tf0.0034291I"md;Tf0.0110152I"nn;Tf0.0586719I"nnp;Tf 0.377991I" nnps;Tf0.0156283I"nns;Tf0.0241967I"pdt;Tf8.77008e-6I"pos;Tf0.0558742I"pp;Tf0.0544184I"ppc;Tf 0.140391I"ppd;Tf0.000236792I"ppl;Tf0.00104364I"ppr;Tf0.00256086I"pps;Tf0.00705115I"prp;Tf0.000868238I" prps;Tf9.64709e-5I"rb;Tf0.00898934I"rbr;Tf0.000315723I"rbs;Tf8.77008e-6I"rp;Tf5.26205e-5I"rrb;Tf0.00352557I"sym;Tf3.50803e-5I"to;Tf0.00755981I"vb;Tf0.000964709I"vbd;Tf0.0653722I"vbg;Tf0.00169263I"vbn;Tf0.000815618I"vbp;Tf0.00399916I"vbz;Tf0.0376938I"wdt;Tf0.000938399I"wp;Tf0.000578826I"wps;Tf8.77008e-6I"wrb;Tf0.000534975"in{1I"cc;Tf0.00128828I"cd;Tf0.0603768I"det;Tf 0.328199I"ex;Tf0.00150162I"fw;Tf0.000180523I"in;Tf0.0200791I"jj;Tf0.0909427I"jjr;Tf0.00493977I"jjs;Tf0.0046936I"lrb;Tf0.000295402I"ls;Tf8.2056e-6I"md;Tf0.000123084I"nn;Tf 0.108191I"nnp;Tf 0.150409I" nnps;Tf0.00177241I"nns;Tf0.0591706I"pdt;Tf0.00137854I"pos;Tf3.28224e-5I"pp;Tf0.00195293I"ppc;Tf0.00242886I"ppd;Tf0.0276693I"ppl;Tf0.00563725I"ppr;Tf0.000155906I"pps;Tf0.000229757I"prp;Tf0.0301884I" prps;Tf 0.03576I"rb;Tf0.0142449I"rbr;Tf0.000722093I"rbs;Tf0.000180523I"rp;Tf1.64112e-5I"rrb;Tf7.38504e-5I"sym;Tf1.64112e-5I"to;Tf0.00224833I"uh;Tf8.2056e-6I"vb;Tf0.000459514I"vbd;Tf0.000672859I"vbg;Tf0.0307956I"vbn;Tf0.00466078I"vbp;Tf0.000246168I"vbz;Tf0.000582598I"wdt;Tf0.00357764I"wp;Tf0.00224833I"wps;Tf3.28224e-5I"wrb;Tf0.0016083"lrb{$I"cc;Tf0.0269139I"cd;Tf 0.034689I"det;Tf0.0717703I"ex;Tf0.00119617I"fw;Tf0.00299043I"in;Tf0.0843301I"jj;Tf0.0466507I"jjr;Tf0.00179426I"ls;Tf0.00119617I"md;Tf0.00119617I"nn;Tf0.0633971I"nnp;Tf 0.330742I" nnps;Tf0.00179426I"nns;Tf0.0161483I"pdt;Tf0.000598086I"ppd;Tf 0.166866I"ppl;Tf0.0263158I"prp;Tf0.0197368I" prps;Tf0.0041866I"rb;Tf0.0305024I"to;Tf0.0041866I"uh;Tf0.00358852I"vb;Tf0.0131579I"vbd;Tf0.00119617I"vbg;Tf0.00777512I"vbn;Tf 0.020933I"vbp;Tf0.000598086I"vbz;Tf0.000598086I"wdt;Tf0.00717703I"wp;Tf0.00538278I"wrb;Tf0.00239234"fw{I"cc;Tf0.0149254I"det;Tf0.00746269I"fw;Tf 0.246269I"in;Tf0.0261194I"jj;Tf0.0261194I"lrb;Tf0.0149254I"md;Tf0.00746269I"nn;Tf0.0708955I"nnp;Tf 0.201493I"nns;Tf0.0298507I"pos;Tf0.00746269I"pp;Tf 0.108209I"ppc;Tf0.0970149I"ppd;Tf0.00746269I"ppl;Tf0.00373134I"ppr;Tf0.0559702I"pps;Tf 0.011194I"rrb;Tf0.00746269I"to;Tf0.0149254I"vbn;Tf0.00373134I"vbp;Tf0.00373134I"vbz;Tf0.0335821"wrb{(I"cc;Tf0.00342857I"cd;Tf0.00647619I"det;Tf 0.252952I"ex;Tf0.00952381I"in;Tf0.00952381I"jj;Tf0.0963809I"jjr;Tf0.00114286I"jjs;Tf0.00152381I"lrb;Tf0.000380952I"md;Tf0.0118095I"nn;Tf0.0609524I"nnp;Tf0.0998095I" nnps;Tf0.00152381I"nns;Tf0.0788571I"pdt;Tf0.00228571I"pp;Tf0.00304762I"ppc;Tf0.0110476I"ppd;Tf0.000380952I"ppl;Tf0.00266667I"prp;Tf 0.212571I" prps;Tf 0.023619I"rb;Tf0.0388571I"rbs;Tf0.000380952I"rp;Tf0.000380952I"rrb;Tf0.000380952I"to;Tf 0.023619I"vb;Tf0.00228571I"vbd;Tf0.00685714I"vbg;Tf0.00990476I"vbn;Tf0.0102857I"vbp;Tf0.00609524I"vbz;Tf0.00990476I"wdt;Tf0.000380952I"wp;Tf0.000380952I"wrb;Tf0.000380952"wdt{"I"cc;Tf0.000375728I"cd;Tf0.0046966I"det;Tf0.0272403I"ex;Tf0.00169078I"in;Tf0.00901747I"jj;Tf0.00920534I"jjr;Tf0.000751456I"jjs;Tf0.000751456I"lrb;Tf0.000187864I"md;Tf 0.134135I"nn;Tf0.0146534I"nnp;Tf0.0212286I" nnps;Tf0.000187864I"nns;Tf0.0174714I"pos;Tf0.000187864I"ppc;Tf0.00544806I"ppd;Tf0.00093932I"ppl;Tf0.000751456I"pps;Tf0.000375728I"prp;Tf0.0338155I" prps;Tf0.00169078I"rb;Tf0.0479053I"rbr;Tf0.000187864I"to;Tf0.00150291I"vb;Tf0.00112718I"vbd;Tf 0.229946I"vbn;Tf0.00093932I"vbp;Tf 0.145595I"vbz;Tf 0.287995"sym{I"cd;Tf0.0614286I"fw;Tf0.0185714I"in;Tf0.0185714I"jj;Tf0.0471429I"nn;Tf0.0471429I"nnp;Tf0.0471429I"pps;Tf 0.604286I"rb;Tf0.0185714I"sym;Tf0.1I"vbn;Tf0.0042857I"vbz;Tf0.0328571"ppr{.I"cc;Tf0.0600601I"cd;Tf 0.00462I"det;Tf0.0990991I"ex;Tf0.0017325I"fw;Tf 0.000231I"in;Tf 0.131786I"jj;Tf 0.019635I"jjr;Tf 0.000462I"jjs;Tf0.000693001I"lrb;Tf0.00820051I"md;Tf0.0047355I"nn;Tf0.0446985I"nnp;Tf 0.135251I" nnps;Tf0.0003465I"nns;Tf0.0294525I"pdt;Tf0.0003465I"pp;Tf0.0021945I"ppc;Tf0.000808501I"ppd;Tf0.0001155I"ppl;Tf 0.011319I"ppr;Tf0.00646801I"pps;Tf0.00993301I"prp;Tf 0.10857I" prps;Tf 0.004389I"rb;Tf 0.033495I"rbr;Tf0.0003465I"rbs;Tf 0.000231I"rrb;Tf0.0042735I"sym;Tf0.0001155I"to;Tf0.0167475I"uh;Tf 0.000231I"vb;Tf0.0026565I"vbd;Tf0.0937861I"vbg;Tf0.0125895I"vbn;Tf0.00704551I"vbp;Tf0.0033495I"vbz;Tf 0.122661I"wdt;Tf0.00924001I"wp;Tf0.0026565I"wps;Tf0.000577501I"wrb;Tf0.00485101"ls{ I"jj;Tf 0.03125I"nn;Tf 0.015625I"pp;Tf 0.40625I"ppc;Tf 3 | 0.125I"pps;Tf 0.109375I"rrb;Tf 0.3125"jjr{$I"cc;Tf0.0207869I"cd;Tf0.0027221I"det;Tf0.00668151I"in;Tf 0.320713I"jj;Tf0.0487503I"lrb;Tf0.000247463I"md;Tf0.000989854I"nn;Tf 0.268003I"nnp;Tf0.0173224I" nnps;Tf0.000494927I"nns;Tf 0.169018I"pdt;Tf0.000247463I"pp;Tf0.0514724I"ppc;Tf0.0400891I"ppd;Tf0.000989854I"ppl;Tf0.00123732I"ppr;Tf0.00173224I"pps;Tf0.00445434I"prp;Tf0.00173224I" prps;Tf0.000742391I"rb;Tf0.0054442I"rp;Tf0.00148478I"rrb;Tf0.000494927I"to;Tf 0.023509I"vb;Tf0.000247463I"vbd;Tf0.0027221I"vbg;Tf0.00173224I"vbn;Tf0.00247463I"vbp;Tf0.00123732I"vbz;Tf0.00148478I"wrb;Tf0.000742391"uh{I"in;Tf 0.017094I"nn;Tf 0.017094I"nns;Tf0.00854701I"pp;Tf 0.196581I"ppc;Tf 0.529915I"ppr;Tf0.0512821I"pps;Tf 0.034188I"prp;Tf0.00854701I" prps;Tf0.00854701I"rrb;Tf 0.025641I"to;Tf0.0512821I"uh;Tf 0.034188I"vb;Tf 0.017094"rp{&I"cc;Tf0.0116031I"cd;Tf 0.020458I"det;Tf 0.211298I"in;Tf 0.246107I"jj;Tf 0.050687I"jjr;Tf0.00580153I"jjs;Tf0.00244275I"lrb;Tf0.000610687I"md;Tf0.000305344I"nn;Tf0.0476336I"nnp;Tf0.0256489I"nns;Tf0.0500763I"pdt;Tf0.000916031I"pp;Tf0.0622901I"ppc;Tf0.0445802I"ppd;Tf0.00885496I"ppl;Tf0.00580153I"ppr;Tf0.00610687I"pps;Tf0.00366412I"prp;Tf0.000916031I" prps;Tf0.0525191I"rb;Tf0.0522137I"rbr;Tf0.00396947I"rp;Tf0.000610687I"rrb;Tf0.000305344I"to;Tf0.0531298I"vbd;Tf0.00122137I"vbg;Tf0.0180153I"vbn;Tf0.00183206I"vbp;Tf0.000916031I"vbz;Tf0.000610687I"wp;Tf0.00458015I"wrb;Tf0.00427481"pps{.I"cc;Tf0.0653852I"cd;Tf 0.123542I"det;Tf 0.117299I"ex;Tf0.00180713I"fw;Tf0.000328569I"in;Tf0.0791852I"jj;Tf0.0395926I"jjr;Tf0.00312141I"jjs;Tf0.000985707I"lrb;Tf0.000492854I"ls;Tf0.00279284I"md;Tf0.00903565I"nn;Tf 0.042714I"nnp;Tf 0.120092I" nnps;Tf0.00114999I"nns;Tf 0.034664I"pdt;Tf0.000492854I"pp;Tf0.0180713I"ppc;Tf0.000821423I"ppd;Tf0.0300641I"ppl;Tf0.0668638I"ppr;Tf0.00131428I"pps;Tf0.00443568I"prp;Tf0.0423854I" prps;Tf0.00706424I"rb;Tf0.0542139I"rbr;Tf0.000657138I"rbs;Tf0.000657138I"sym;Tf0.000492854I"to;Tf0.0110071I"uh;Tf0.000657138I"vb;Tf0.0139642I"vbd;Tf0.0185642I"vbg;Tf 0.021357I"vbn;Tf0.0137999I"vbp;Tf0.0119928I"vbz;Tf0.0159356I"wdt;Tf0.0108428I"wp;Tf0.00591424I"wps;Tf0.000164285I"wrb;Tf0.00607853"nns{.I"cc;Tf0.0593685I"cd;Tf0.00163635I"det;Tf0.0170803I"ex;Tf8.11414e-5I"in;Tf 0.23504I"jj;Tf0.0166746I"jjr;Tf0.00119007I"jjs;Tf6.76178e-5I"lrb;Tf0.00425992I"md;Tf0.0277233I"nn;Tf0.0211373I"nnp;Tf0.00301575I" nnps;Tf2.70471e-5I"nns;Tf0.0107783I"pdt;Tf5.40943e-5I"pos;Tf0.00922307I"pp;Tf 0.134952I"ppc;Tf 0.124079I"ppd;Tf0.000283995I"ppl;Tf0.00232605I"ppr;Tf0.00405707I"pps;Tf0.0189465I"prp;Tf0.00462506I" prps;Tf0.000229901I"rb;Tf0.0311177I"rbr;Tf0.00192035I"rbs;Tf0.000121712I"rp;Tf0.000689702I"rrb;Tf0.00167692I"sym;Tf5.40943e-5I"to;Tf0.0396105I"vb;Tf0.00397593I"vbd;Tf0.0745284I"vbg;Tf0.0141051I"vbn;Tf0.0207451I"vbp;Tf0.0845223I"vbz;Tf0.00806004I"wdt;Tf0.0124011I"wp;Tf0.00695111I"wps;Tf0.000446278I"wrb;Tf0.00221786"jjs{#I"cc;Tf0.00835422I"cd;Tf 0.06934I"det;Tf0.0183793I"ex;Tf0.00125313I"in;Tf 0.161654I"jj;Tf 0.103592I"lrb;Tf0.000417711I"md;Tf0.000835422I"nn;Tf 0.351713I"nnp;Tf0.0108605I" nnps;Tf0.00167084I"nns;Tf 0.155388I"pdt;Tf0.00125313I"pp;Tf0.0121136I"ppc;Tf0.0263158I"ppd;Tf0.0108605I"ppl;Tf0.00125313I"ppr;Tf0.00167084I"pps;Tf0.00459482I"prp;Tf0.00459482I" prps;Tf0.000417711I"rb;Tf 0.015873I"to;Tf0.0037594I"vb;Tf0.00292398I"vbd;Tf0.00334169I"vbg;Tf0.00584795I"vbn;Tf0.00501253I"vbp;Tf0.0121136I"vbz;Tf0.00292398I"wrb;Tf0.00167084"ex{I"det;Tf0.00185701I"md;Tf0.0807799I"pos;Tf0.0120706I"ppc;Tf0.00371402I"prp;Tf0.000928505I"rb;Tf0.0222841I"to;Tf0.000928505I"vb;Tf0.00185701I"vbd;Tf 0.182916I"vbp;Tf 0.224698I"vbz;Tf 0.46611I"wp;Tf0.00185701"det{-I"cc;Tf0.000711631I"cd;Tf0.0235332I"det;Tf0.00166047I"fw;Tf0.000227327I"in;Tf0.00962679I"jj;Tf 0.216346I"jjr;Tf0.00550526I"jjs;Tf0.00935004I"lrb;Tf0.000474421I"md;Tf0.00213489I"nn;Tf 0.473709I"nnp;Tf 0.113584I" nnps;Tf0.00454653I"nns;Tf0.0734265I"pdt;Tf9.88377e-6I"pos;Tf2.96513e-5I"pp;Tf0.00157152I"ppc;Tf0.0022535I"ppd;Tf0.0090733I"ppl;Tf0.00576224I"ppr;Tf4.94188e-5I"pps;Tf0.000355816I"prp;Tf0.000484305I" prps;Tf0.000642445I"rb;Tf 0.0102I"rbr;Tf0.00179885I"rbs;Tf0.00280699I"rp;Tf6.91864e-5I"rrb;Tf3.95351e-5I"to;Tf0.000286629I"uh;Tf1.97675e-5I"vb;Tf0.00023721I"vbd;Tf0.00230292I"vbg;Tf0.00810469I"vbn;Tf0.00839132I"vbp;Tf0.00171978I"vbz;Tf0.00792678I"wdt;Tf0.000217443I"wp;Tf0.000800585I"wrb;Tf9.88377e-6" prps{I"cc;Tf0.000488234I"cd;Tf0.0210917I"fw;Tf0.00029294I"in;Tf9.76467e-5I"jj;Tf 0.240992I"jjr;Tf0.00273411I"jjs;Tf0.0106435I"lrb;Tf0.00087882I"nn;Tf 0.441656I"nnp;Tf 0.048921I" nnps;Tf0.00058588I"nns;Tf 0.195293I"pp;Tf9.76467e-5I"ppc;Tf0.00029294I"ppd;Tf0.00790938I"ppl;Tf0.00478469I"pps;Tf9.76467e-5I"rb;Tf0.00546822I"rbr;Tf0.000195293I"rbs;Tf0.00205058I"rrb;Tf0.00029294I"vbd;Tf0.00029294I"vbg;Tf0.0075188I"vbn;Tf0.00712821I"vbz;Tf0.000195293"pos{(I"cc;Tf0.00648268I"cd;Tf0.0250046I"det;Tf0.000370439I"fw;Tf0.000185219I"in;Tf0.00277829I"jj;Tf 0.207261I"jjr;Tf0.00231524I"jjs;Tf0.0253751I"lrb;Tf0.000277829I"md;Tf0.000926097I"nn;Tf 0.417114I"nnp;Tf 0.107242I" nnps;Tf0.00361178I"nns;Tf 0.128264I"pp;Tf0.00509354I"ppc;Tf0.00592702I"ppd;Tf0.0095388I"ppl;Tf0.0114836I"ppr;Tf0.000370439I"pps;Tf0.000185219I"prp;Tf9.26097e-5I" prps;Tf0.000277829I"rb;Tf0.00555658I"rbr;Tf0.000370439I"rbs;Tf0.00240785I"rrb;Tf0.000926097I"to;Tf0.000185219I"vb;Tf0.000370439I"vbd;Tf0.00601963I"vbg;Tf0.0115762I"vbn;Tf0.00768661I"vbp;Tf0.000463049I"vbz;Tf0.00388961I"wp;Tf0.000185219I"wrb;Tf0.000185219 --------------------------------------------------------------------------------