├── .gitignore ├── CMakeLists.txt ├── MANIFEST.in ├── README.md ├── etc ├── pfp │ └── tokenizer.flex └── test │ └── sample_input ├── include ├── moost │ ├── http.hpp │ └── http │ │ ├── connection.hpp │ │ ├── header.hpp │ │ ├── mime_types.hpp │ │ ├── reply.hpp │ │ ├── request.hpp │ │ ├── request_handler_base.hpp │ │ ├── request_parser.hpp │ │ └── server.hpp ├── pfp │ ├── binary_grammar.hpp │ ├── config.h │ ├── lexicon.hpp │ ├── pcfg_parser.hpp │ ├── state_list.hpp │ ├── tokenizer.h │ ├── unary_grammar.hpp │ └── util.hpp ├── pfpd │ ├── pfpd_handler.h │ └── resource_stack.hpp └── pypfp │ └── pypfp.h ├── setup.py ├── share └── pfp │ ├── americanizations │ ├── binary_rules │ ├── sig_state │ ├── sigs │ ├── states │ ├── unary_rules │ ├── word_state │ └── words └── src ├── moost └── http │ ├── mime_types.cpp │ ├── reply.cpp │ └── request_parser.cpp ├── pfp ├── config.cpp └── tokenizer.yy.cpp ├── pfpc ├── main.cpp └── pfpc_token.cpp ├── pfpd ├── main.cpp └── pfpd_handler.cpp ├── pypfp └── pypfp.cpp └── test ├── lexicon.cpp ├── main.cpp ├── pcfg_parser.cpp ├── pfp.cpp └── tokenizer.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore .pyc files 2 | *.pyc 3 | # ignore eclipse stuff 4 | .project 5 | .cproject 6 | .pydevproject 7 | # ignore setuptools egg-info stuff 8 | *.egg-info 9 | build 10 | dist 11 | # ignore project-local env dirs 12 | env 13 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | PROJECT(pfp) 2 | 3 | CMAKE_MINIMUM_REQUIRED(VERSION 2.4) 4 | 5 | if(COMMAND cmake_policy) 6 | cmake_policy(SET CMP0003 NEW) 7 | endif(COMMAND cmake_policy) 8 | 9 | SET(CMAKE_VERBOSE_MAKEFILE ON) 10 | 11 | # binaries get installed here 12 | SET(CMAKE_INSTALL_PREFIX "/usr/") 13 | 14 | # add definitions, compiler switches, etc. 15 | 16 | IF(APPLE) 17 | ADD_DEFINITIONS(-Wall -O3 -DNDEBUG) 18 | ELSE(APPLE) 19 | ADD_DEFINITIONS(-Wall -O3 -DNDEBUG -march=native -mtune=native `getconf LFS_CFLAGS`) 20 | ENDIF(APPLE) 21 | 22 | INCLUDE_DIRECTORIES(include /usr/include/python2.6) 23 | 24 | ADD_EXECUTABLE(test 25 | src/test/lexicon.cpp 26 | src/test/pcfg_parser.cpp 27 | src/test/tokenizer.cpp 28 | src/test/pfp.cpp 29 | src/test/main.cpp 30 | ) 31 | 32 | ADD_EXECUTABLE(pfpc 33 | src/pfpc/main.cpp 34 | ) 35 | 36 | ADD_EXECUTABLE(pfpc_token 37 | src/pfpc/pfpc_token.cpp 38 | ) 39 | 40 | ADD_LIBRARY(pfp SHARED 41 | src/pfp/config 42 | src/pfp/tokenizer.yy 43 | ) 44 | 45 | ADD_EXECUTABLE(pfpd 46 | src/pfpd/main 47 | src/pfpd/pfpd_handler 48 | src/moost/http/mime_types 49 | src/moost/http/reply 50 | src/moost/http/request_parser 51 | ) 52 | 53 | IF(APPLE) 54 | TARGET_LINK_LIBRARIES(pfpd pfp boost_filesystem-mt boost_thread-mt boost_regex-mt boost_system-mt icuio) 55 | TARGET_LINK_LIBRARIES(pfpc pfp boost_filesystem-mt boost_thread-mt boost_regex-mt boost_system-mt icuio) 56 | TARGET_LINK_LIBRARIES(pfpc_token pfp boost_filesystem-mt boost_thread-mt boost_regex-mt boost_system-mt icuio) 57 | TARGET_LINK_LIBRARIES(test pfp boost_thread-mt boost_unit_test_framework-mt boost_regex-mt icuio) 58 | TARGET_LINK_LIBRARIES(pfp boost_filesystem-mt boost_thread-mt boost_regex-mt boost_system-mt icuio icuuc) 59 | ELSE(APPLE) 60 | TARGET_LINK_LIBRARIES(pfpd pfp boost_filesystem boost_thread boost_regex boost_system icuio icuuc) 61 | TARGET_LINK_LIBRARIES(pfpc pfp boost_filesystem boost_thread boost_regex boost_system icuio icuuc) 62 | TARGET_LINK_LIBRARIES(pfpc_token pfp boost_filesystem boost_thread boost_regex boost_system icuio icuuc) 63 | TARGET_LINK_LIBRARIES(test pfp boost_thread boost_unit_test_framework boost_regex icuio icuuc) 64 | ENDIF(APPLE) 65 | 66 | INSTALL(TARGETS pfpd DESTINATION bin) 67 | INSTALL(TARGETS pfpc DESTINATION bin) 68 | INSTALL(TARGETS pfpc_token DESTINATION bin) 69 | 70 | INSTALL(TARGETS pfp LIBRARY DESTINATION lib) 71 | INSTALL(DIRECTORY share/pfp DESTINATION share) 72 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include include/pfp * 2 | recursive-include include/pypfp * 3 | recursive-include share/pfp * 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pfp -- pretty fast statistical parser for natural languages 2 | =========================================================== 3 | 4 | pfp is a **pretty fast statistical parser** for probabilistic context free grammars. pfp uses the exhaustive [CYK](http://en.wikipedia.org/wiki/CYK_algorithm) algorithm found in [the Stanford NLP parser](http://nlp.stanford.edu/software/lex-parser.shtml) (and gratefully uses its trained grammar), but boasts the following improvements: 5 | 6 | * 3-4x faster than the Stanford parser 7 | * Uses 5-8x less resident memory 8 | * Thread-safe/multi-core 9 | * Python bindings 10 | * Lightweight threadpool http server 11 | * Command-line client 12 | 13 | ## Installing 14 | 15 | The following works on **Ubuntu 10.04 LTS**: 16 | 17 | sudo apt-get install -y git-core cmake build-essential libboost-all-dev python-dev 18 | git clone http://github.com/wavii/pfp.git && cd pfp 19 | cmake . 20 | make 21 | ./test && sudo make install 22 | 23 | To install the python library: 24 | 25 | sudo python setup.py install 26 | 27 | If you are running OS X using [Homebrew](http://mxcl.github.com/homebrew/) you may need to first get your dependencies in order: 28 | 29 | brew install icu4c boost boost-jam 30 | brew link icu4c --force 31 | 32 | ## Benchmarks 33 | 34 | Parse times and maximum memory usage were recorded on a c1.medium Amazon EC2 instance. pfp is compared to Stanford Parser version **1.6.3** running on **sun-java6**. 35 | 36 | Sentence Length Stanford (avg ms) pfp (avg ms) Stanford (res mb) pfp (res mb) 37 | 0-9 21.0407725322 5.12853470437 83 10 38 | 10-19 104.675603217 34.9076086957 83 14 39 | 20-29 363.596421471 124.78 105 21 40 | 30-39 830.084942085 320.664122137 150 30 41 | 40-45 1607.43820225 542.533333333 150 35 42 | 43 | ## Usage 44 | 45 | **pfpc** is a command-line client that reads from `stdin` and writes parses to `stdout`: 46 | 47 | $ echo "I love monkeys." | pfpc 2>/dev/null 48 | (ROOT (S (NP (PRP I)) (VP (VBP love) (NP (NNS monkeys))) (. .)) ) 49 | 50 | **pfpd** is a threadpool web server that wraps pfp: 51 | 52 | $ pfpd localhost 8080 2>/dev/null & 53 | [1] 3600 54 | $ curl http://localhost:8080/parse/I+love+monkeys. 55 | (ROOT (S (NP (PRP I)) (VP (VBP love) (NP (NNS monkeys))) (. .)) ) 56 | 57 | **pypfp** are python bindings for pfp: 58 | 59 | $ python 60 | >>> import pfp 61 | >>> pfp.Parser().parse("I love monkeys.") 62 | '(ROOT (S (NP (PRP I)) (VP (VBP love) (NP (NNS monkeys))) (. .)) )' 63 | 64 | ## License 65 | 66 | (GPL v2) 67 | 68 | Copyright (c) 2010 Wavii, Inc. 69 | 70 | This program is free software: you can redistribute it and/or modify 71 | it under the terms of the GNU General Public License as published by 72 | the Free Software Foundation, either version 2 of the License, or 73 | (at your option) any later version. 74 | 75 | This program is distributed in the hope that it will be useful, 76 | but WITHOUT ANY WARRANTY; without even the implied warranty of 77 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 78 | GNU General Public License for more details. 79 | 80 | You should have received a copy of the GNU General Public License 81 | along with this program. If not, see . 82 | -------------------------------------------------------------------------------- /etc/pfp/tokenizer.flex: -------------------------------------------------------------------------------- 1 | %option reentrant noyywrap case-insensitive 2 | %option extra-type="com::wavii::pfp::tokenizer::tokenizer_out *" 3 | %{ 4 | 5 | #include 6 | 7 | %} 8 | 9 | SGML <\/?[A-Za-z!][^>]*> 10 | SPMDASH &(MD|mdash);|\x96|\x97|\xe2\x80\x93|\xe2\x80\x94 11 | SPAMP & 12 | SPPUNC &(HT|TL|UR|LR|QC|QL|QR|odq|cdq|lt|gt|#[0-9]+); 13 | SPLET &[aeiouAEIOU](acute|grave|uml); 14 | SPACE [ \t]+ 15 | SPACENL [ \t\r\n]+ 16 | SENTEND [ \t\n][ \t\n]+|[ \t\n]+([A-Z]|{SGML}) 17 | DIGIT [0-9] 18 | DATE {DIGIT}{1,2}[\-\/]{DIGIT}{1,2}[\-\/]{DIGIT}{2,4} 19 | NUM {DIGIT}+|{DIGIT}*([.:,]{DIGIT}+)+ 20 | NUMBER [\-+]?{NUM}|\({NUM}\) 21 | /* Constrain fraction to only match likely fractions */ 22 | FRAC ({DIGIT}{1,4}[- ])?{DIGIT}{1,4}\\?\/{DIGIT}{1,4} 23 | FRAC2 \xc2\xbc|\xc2\xbd|\xc2\xbe 24 | DOLSIGN ([A-Z]*\$|#) 25 | DOLSIGN2 \xc2\xa2|\xc2\xa3|\xc2\x80|\xe2\x82\xac 26 | /* not used DOLLAR {DOLSIGN}[ \t]*{NUMBER} */ 27 | /* |\( ?{NUMBER} ?\)) # is for pound signs */ 28 | WORD ([A-Za-z]|\xc3[\x80-\xbf]|{SPLET})+ 29 | /* The $ was for things like New$ */ 30 | /* WAS: only keep hyphens with short one side like co-ed */ 31 | /* But treebank just allows hyphenated things as words! */ 32 | THING [A-Za-z0-9]+([_-][A-Za-z0-9]+)* 33 | THINGA [A-Z]+(([+&]|{SPAMP})[A-Z]+)+ 34 | THING3 [A-Za-z0-9]+(-[A-Za-z]+){0,2}(\\?\/[A-Za-z0-9]+(-[A-Za-z]+){0,2}){1,2} 35 | APOS [']|\xc2\x92|\xe2\x80\x99|' 36 | HTHING ([A-Za-z0-9][A-Za-z0-9%.,]*(-([A-Za-z0-9]+|{ACRO}\.))+)|[dDOlL]{APOS}{THING} 37 | REDAUX {APOS}([msdMSD]|re|ve|ll) 38 | /* For things that will have n't on the end. They can't end in 'n' */ 39 | SWORD [A-Za-z]*[A-MO-Za-mo-z] 40 | SREDAUX n{APOS}t 41 | /* Tokens you want but already okay: C'mon 'n' '[2-9]0s '[eE]m 'till? 42 | [Yy]'all 'Cause Shi'ite B'Gosh o'clock. Here now only need apostrophe 43 | final words. */ 44 | APOWORD {APOS}n{APOS}?|[lLdDjJ]'|Dunkin{APOS}|somethin{APOS}|ol{APOS}|{APOS}em|C{APOS}mon|{APOS}[2-9]0s|{APOS}till?|o{APOS}clock|[A-Za-z][a-z]*[aeiou]{APOS}[aeiou][a-z]*|{APOS}cause 45 | FULLURL https?:\/\/[^ \t\n\f\r\"<>|()]+[^ \t\n\f\r\"<>|.!?(){},-] 46 | LIKELYURL ((www\.([^ \t\n\f\r\"<>|.!?(){},]+\.)+[a-zA-Z]{2,4})|(([^ \t\n\f\r\"`'<>|.!?(){},-_$]+\.)+(com|net|org|edu)))(\/[^ \t\n\f\r\"<>|()]+[^ \t\n\f\r\"<>|.!?(){},-])? 47 | EMAIL [a-zA-Z0-9][^ \t\n\f\r\"<>|()]*@([^ \t\n\f\r\"<>|().]+\.)+[a-zA-Z]{2,4} 48 | 49 | /* Abbreviations - induced from 1987 WSJ by hand */ 50 | ABMONTH Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec 51 | /* Jun and Jul barely occur, but don't seem dangerous */ 52 | ABDAYS Mon|Tue|Tues|Wed|Thu|Thurs|Fri 53 | /* In caseless, |a\.m|p\.m handled as ACRO, and this is better as can often 54 | be followed by capitalized. */ 55 | /* Sat. and Sun. barely occur and can easily lead to errors, so we omit them */ 56 | ABSTATE Calif|Mass|Conn|Fla|Ill|Mich|Pa|Va|Ariz|Tenn|Mo|Md|Wis|Minn|Ind|Okla|Wash|Kan|Ore|Ga|Colo|Ky|Del|Ala|La|Nev|Neb|Ark|Miss|Vt|Wyo|Tex 57 | ACRO [A-Za-z](\.[A-Za-z])+|(Canada|Sino|Korean|EU|Japan|non)-U\.S|U\.S\.-(U\.K|U\.S\.S\.R) 58 | ABTITLE Mr|Mrs|Ms|Miss|Drs?|Profs?|Sens?|Reps?|Lt|Col|Gen|Messrs|Govs?|Adm|Rev|Maj|Sgt|Pvt|Mt|Capt|St|Ave|Pres 59 | ABPTIT Jr|Bros|Sr 60 | ABCOMP Inc|Cos?|Corp|Pty|Ltd|Plc|Bancorp|Dept|Mfg|Bhd|Assn 61 | ABNUM Nos?|Prop|Ph 62 | /* p used to be in ABNUM list, but it can't be any more, since the lexer 63 | is now caseless. We don't want to have it recognized for P. Both 64 | p. and P. are now under ABBREV4. ABLIST also went away as no-op [a-e] */ 65 | /* ABBREV1 abbreviations are normally followed by lower case words. If 66 | they're followed by an uppercase one, we assume there is also a 67 | sentence boundary */ 68 | ABBREV3 {ABMONTH}|{ABDAYS}|{ABSTATE}|{ABCOMP}|{ABNUM}|{ABPTIT}|etc|ft 69 | ABBREV1 {ABBREV3}\. 70 | 71 | /* ABRREV2 abbreviations are normally followed by an upper case word. We 72 | assume they aren't used sentence finally */ 73 | /* ACRO Is a bad case -- can go either way! */ 74 | ABBREV4 [A-Za-z]|{ABTITLE}|vs|Alex|Cie|a\.k\.a|TREAS|{ACRO} 75 | ABBREV2 {ABBREV4}\. 76 | /* Cie. is used before French companies */ 77 | /* in the WSJ Alex. is generally an abbreviation for Alex. Brown, brokers! */ 78 | /* In tables: Mkt. for market Div. for division of company, Chg., Yr.: year */ 79 | 80 | PHONE \([0-9]{3}\)\ ?[0-9]{3}[\- ][0-9]{4} 81 | OPBRAC [<\[] 82 | CLBRAC [>\]] 83 | HYPHENS \-+|(\xe8\x88\x92)+ 84 | LDOTS \.{3,5}|(\.\ ){2,4}\.|\xc2\x85|\xe2\x80\xa6 85 | ATS @+ 86 | UNDS _+ 87 | ASTS \*+|(\\\*){1,3} 88 | HASHES #+ 89 | FNMARKS {ATS}|{HASHES}|{UNDS} 90 | INSENTP [,;:] 91 | QUOTES `|{APOS}|``|''|(\xe2\x80\x98|\xe2\x80\x99|\xe2\x80\x9c|\xe2\x80\x9d|\xc2\x91|\xc2\x92|\xc2\x93|\xc2\x94){1,2} 92 | DBLQUOT \"|" 93 | TBSPEC -(RRB|LRB|RCB|LCB|RSB|LSB)-|C\.D\.s|D'Amico|M'Bow|pro-|anti-|S&P-500|Jos\.|cont'd\.?|B'Gosh|S&Ls|N'Ko|'twas 94 | TBSPEC2 {APOS}[0-9][0-9] 95 | 96 | %% 97 | 98 | {SGML} { yyextra->put(yytext); } 99 | {SPMDASH} { yyextra->put("--"); } 100 | {SPAMP} { yyextra->put("&"); } 101 | {SPPUNC} { yyextra->put(yytext); } 102 | {WORD}/{REDAUX} { yyextra->put(yytext); } 103 | {SWORD}/{SREDAUX} { yyextra->put(yytext); } 104 | {WORD} { yyextra->put_american(yytext); } 105 | {APOWORD} { yyextra->put(yytext); } 106 | {FULLURL} { yyextra->put(yytext); } 107 | {LIKELYURL} { yyextra->put(yytext); } 108 | {EMAIL} { yyextra->put(yytext); } 109 | {REDAUX}/[^A-Za-z] { yyextra->put_cp1252(yytext); } 110 | {SREDAUX} { yyextra->put_cp1252(yytext); } 111 | {DATE} { yyextra->put(yytext); } 112 | {NUMBER} { yyextra->put(yytext); } 113 | {FRAC} { yyextra->put_escape(yytext, '/'); } 114 | {FRAC2} { yyextra->put_cp1252(yytext); } 115 | {TBSPEC} { yyextra->put(yytext); } 116 | {THING3} { yyextra->put_escape(yytext, '/'); } 117 | {DOLSIGN} { yyextra->put(yytext); } 118 | {DOLSIGN2} { yyextra->put_cp1252(yytext); } 119 | {ABBREV1}/{SENTEND} { yyextra->put(yytext); /* TODO: reinstate this when i can figure out how to get flex/jflex case-insensitivity to be the same: unput('.'); */ /* return a period for next time */ } 120 | {ABBREV1} { yyextra->put(yytext); } 121 | {ABBREV2} { yyextra->put(yytext); } 122 | {ABBREV4}/{SPACE} { yyextra->put(yytext); } 123 | {ACRO}/{SPACENL} { yyextra->put(yytext); } 124 | {TBSPEC2}/{SPACENL} { yyextra->put(yytext); } 125 | {WORD}\./{INSENTP} { yyextra->put(yytext); } 126 | {PHONE} { yyextra->put(yytext); } 127 | {DBLQUOT}/[A-Za-z0-9$] { yyextra->put("``"); } 128 | {DBLQUOT} { yyextra->put("''"); } 129 | \+ { yyextra->put(yytext); } 130 | %|& { yyextra->put(yytext); } 131 | \~|\^ { yyextra->put(yytext); } 132 | \||\\|0x7f {} 133 | {OPBRAC} { yyextra->put("-LRB-"); } 134 | {CLBRAC} { yyextra->put("-RRB-"); } 135 | \{ { yyextra->put("-LCB-"); } 136 | \} { yyextra->put("-RCB-"); } 137 | \( { yyextra->put("-LRB-"); } 138 | \) { yyextra->put("-RRB-"); } 139 | {HYPHENS} { if (yyleng >= 3 && yyleng <= 4) yyextra->put("--"); else yyextra->put(yytext); } 140 | {LDOTS} { yyextra->put("..."); } 141 | {FNMARKS} { yyextra->put(yytext); } 142 | {ASTS} { yyextra->put_escape(yytext, '*'); } 143 | {INSENTP} { yyextra->put(yytext); } 144 | \.|\?|\! { yyextra->put(yytext); } 145 | = { yyextra->put(yytext); } 146 | \/ { yyextra->put_escape(yytext, '/'); } 147 | {HTHING}/[^a-zA-Z0-9.+] { yyextra->put(yytext); } 148 | {THING} { yyextra->put(yytext); } 149 | {THINGA} { yyextra->put_amp(yytext); } 150 | '[A-Za-z]. { yyextra->put("`"); yyless(1); /* invert quote - using trailing context didn't work.... */ } 151 | {REDAUX} { yyextra->put_cp1252(yytext); } 152 | {QUOTES} { yyextra->put_cp1252(yytext); } 153 | \0|{SPACE} { } 154 | \n|\r|\r\n { } 155 |   { } 156 | . { yyextra->err(yytext); } 157 | 158 | %% 159 | 160 | void com::wavii::pfp::tokenizer::tokenize(const std::string & in, std::vector & out) const 161 | { 162 | yyscan_t scanner; 163 | tokenizer_out to(out, *this); 164 | yylex_init_extra( &to, &scanner ); 165 | yy_scan_string(in.c_str(), scanner); 166 | yylex(scanner); 167 | yylex_destroy(scanner); 168 | } -------------------------------------------------------------------------------- /etc/test/sample_input: -------------------------------------------------------------------------------- 1 | 151 -7.83245277404785200000 901 -2.45473408699035640000 5801 -1.45861506462097170000 303 -5.74620342254638700000 2220 -0.35667493939399720000 2 | 296 -14.35528278350830000000 161 -19.14327430725097700000 1759 -11.25418949127197300000 9779 -15.93632125854492200000 139 -18.75084304809570300000 77 -15.01969051361084000000 1702 -14.32996463775634800000 352 -12.35280227661132800000 48 -14.12983417510986300000 483 -16.55500411987304700000 306 -15.55933380126953100000 62 -15.26152324676513700000 277 -18.71525573730468800000 390 -17.79594039916992200000 270 -14.89177608489990200000 207 -17.42170715332031200000 3890 -18.86057662963867200000 303 -13.50388526916503900000 150 -14.86192321777343800000 13 -16.31678390502929700000 2946 -12.35280227661132800000 344 -13.95480537414550800000 1983 -14.02677822113037100000 6349 -13.96224021911621100000 2967 -14.80953788757324200000 193 -15.26816749572753900000 2940 -15.21500301361084000000 232 -15.14601039886474600000 172 -14.77267646789550800000 231 -13.94543266296386700000 372 -14.36873149871826200000 313 -12.40686893463134800000 625 -14.08740329742431600000 136 -14.19843578338623000000 5568 -13.65208530426025400000 109 -14.95959854125976600000 447 -15.03837966918945300000 6991 -15.31463241577148400000 5795 -12.64048385620117200000 1695 -13.45141410827636700000 2846 -18.39859771728515600000 19 -19.71650505065918000000 135 -13.20009994506836000000 169 -8.14065647125244100000 9798 -14.58639430999755900000 170 -14.75603103637695300000 194 -13.43052577972412100000 901 -22.62596702575683600000 833 -15.21124362945556600000 8250 -15.47369766235351600000 8395 -15.88891887664795000000 1671 -15.70853710174560500000 1087 -18.81946563720703000000 9785 -14.08740329742431600000 4685 -16.38408851623535000000 87 -19.68233299255371000000 383 -15.09632682800293000000 243 -15.01538944244384800000 50 -14.74239826202392600000 210 -14.65093326568603500000 428 -17.57675552368164000000 181 -13.14125919342041000000 118 -15.79291534423828100000 361 -12.50695228576660200000 35 -19.11553192138672000000 57 -19.24915885925293000000 177 -19.31625556945800800000 353 -14.64421367645263700000 192 -11.25418949127197300000 2859 -15.76870536804199200000 25 -11.84197616577148400000 911 -18.63942146301269500000 3067 -15.98157787322998000000 336 -14.08740329742431600000 348 -14.34523200988769500000 165 -17.21002769470215000000 11337 -12.64048385620117200000 5533 -12.95893764495849600000 155 -14.01893520355224600000 1021 -16.19939804077148400000 267 -20.52100563049316400000 23 -12.23501873016357400000 212 -13.96224021911621100000 869 -15.03837966918945300000 273 -19.11128425598144500000 183 -14.42955207824707000000 74 -15.37829494476318400000 90 -21.99006462097168000000 55 -14.80953788757324200000 297 -15.75444412231445300000 2291 -5.36824321746826200000 5776 -13.45141410827636700000 148 -14.65248394012451200000 200 -18.96260070800781200000 2289 -15.47858715057373000000 125 -14.71992588043212900000 3294 -13.20009994506836000000 2854 -19.21660614013672000000 10611 -11.94733715057373000000 127 -15.98157787322998000000 287 -15.98157787322998000000 66 -15.41974735260009800000 5087 -12.35280227661132800000 256 -15.91609668731689500000 86 -17.96851158142090000000 2639 -18.97509574890136700000 1737 -16.56739616394043000000 137 -15.09180259704589800000 279 -13.34399414062500000000 123 -20.03589820861816400000 187 -14.44260597229003900000 151 -14.47100067138671900000 2725 -17.40479278564453000000 88 -14.15679264068603500000 304 -15.25054264068603500000 1720 -20.42433357238769500000 39 -16.94454956054687500000 281 -20.25898170471191400000 856 -16.04662513732910000000 67 -13.23381137847900400000 408 -15.94093990325927700000 145 -15.15788841247558600000 2847 -18.89045906066894500000 1988 -13.65208530426025400000 174 -14.42630958557128900000 106 -14.28069400787353500000 636 -14.14456176757812500000 4697 -14.96776199340820300000 814 -17.52706718444824200000 382 -15.48102378845214800000 9788 -12.64048385620117200000 3 | 296 -14.34566783905029300000 161 -19.13365936279297000000 1759 -11.24457454681396500000 9779 -15.92670631408691400000 139 -18.74122810363769500000 77 -15.01007556915283200000 1702 -14.32034969329834000000 352 -12.34318733215332000000 48 -14.12021923065185500000 483 -16.54538917541504000000 306 -5.74930763244628900000 62 -15.25190830230712900000 277 -18.70564079284668000000 390 -17.78632545471191400000 270 -14.88216114044189500000 207 -17.41209220886230500000 3890 -18.85096168518066400000 303 -13.49427032470703100000 150 -14.85230827331543000000 13 -16.30717086791992200000 2946 -12.34318733215332000000 344 -13.94519042968750000000 1983 -14.01716327667236300000 6349 -13.95262527465820300000 2967 -14.79992294311523400000 193 -15.25855255126953100000 2940 -15.20538806915283200000 232 -15.13639545440673800000 172 -14.76306152343750000000 231 -4.07956600189209000000 372 -14.35911655426025400000 313 -12.39725399017334000000 625 -14.07778835296630900000 136 -14.18882083892822300000 5568 -13.64247035980224600000 109 -14.94998359680175800000 447 -15.02876472473144500000 6991 -15.30501747131347700000 5795 -12.63086891174316400000 1695 -13.44179916381836000000 2846 -18.38898277282715000000 19 -19.70689010620117200000 135 -13.19048500061035200000 169 -14.47373199462890600000 9798 -14.57677936553955000000 170 -14.74641609191894500000 194 -13.42091083526611300000 901 -22.61635208129882800000 833 -15.20162963867187500000 8250 -15.46408271789550800000 8395 -15.87930393218994100000 1671 -15.69892215728759800000 1087 -18.80985069274902300000 9785 -14.07778835296630900000 4685 -16.37447357177734400000 87 -19.67271804809570300000 383 -15.08671188354492200000 243 -15.00577545166015600000 50 -14.73278331756591800000 210 -14.64131832122802700000 428 -17.56714057922363300000 181 -13.13164424896240200000 118 -15.78330039978027300000 361 -12.49733829498291000000 35 -19.10591697692871000000 108 -2.39997649192810060000 57 -19.23954391479492200000 177 -19.30664062500000000000 353 -14.63459873199462900000 192 -11.24457454681396500000 2859 -15.75909042358398400000 25 -11.83236122131347700000 911 -18.62980651855468800000 3067 -15.97196292877197300000 336 -14.07778835296630900000 348 -14.33561706542968800000 165 -17.20041275024414000000 11337 -12.63086891174316400000 5533 -12.94932270050048800000 155 -14.00932025909423800000 1021 -16.18978309631347700000 267 -20.51139068603515600000 23 -12.22540378570556600000 212 -13.95262527465820300000 869 -15.02876472473144500000 273 -19.10166931152343800000 183 -14.41993713378906200000 74 -15.36868095397949200000 90 -21.98044967651367200000 55 -14.79992294311523400000 297 -7.72088336944580100000 2291 -15.50254917144775400000 5776 -13.44179916381836000000 148 -14.64286994934082000000 200 -18.95298576354980500000 2289 -15.46897220611572300000 125 -14.71031093597412100000 3294 -13.19048500061035200000 2854 -19.20699119567871000000 10611 -11.93772220611572300000 127 -15.97196292877197300000 287 -15.97196292877197300000 66 -15.41013240814209000000 5087 -12.34318733215332000000 256 -15.90648174285888700000 86 -17.95889663696289000000 2639 -18.96548080444336000000 1737 -16.55778121948242200000 137 -15.08218765258789000000 279 -13.33437919616699200000 123 -20.02628326416015600000 187 -14.43299198150634800000 151 -14.46138572692871100000 2725 -17.39517784118652300000 88 -14.14717769622802700000 304 -15.24092769622802700000 1720 -20.41471862792968800000 39 -16.93493461608886700000 281 -20.24936676025390600000 856 -16.03701019287109400000 67 -13.22419643402099600000 408 -15.93132495880127000000 145 -8.99955463409423800000 2847 -18.88084411621093800000 1988 -13.64247035980224600000 174 -14.41669464111328100000 106 -14.27107906341552700000 636 -14.13494682312011700000 4697 -14.95814704895019500000 814 -17.51745223999023400000 382 -15.47140884399414000000 9788 -12.63086891174316400000 4 | 86 -2.33385920524597170000 270 -3.63758611679077150000 68 -4.02535152435302700000 869 -1.29928302764892580000 281 -1.55204069614410400000 2846 -5.19849681854248050000 181 -1.41706597805023200000 118 -5.90148878097534200000 161 -2.73579287528991700000 1720 -5.38595342636108400000 428 -4.93627071380615200000 272 -2.30258512496948240000 160 -4.05178499221801800000 328 -2.55204606056213400000 38 -4.96284484863281250000 316 -1.20397281646728520000 1021 -7.58426475524902300000 368 -3.43398714065551760000 1877 -0.97516810894012450000 1236 -2.19722461700439450000 3890 -6.50777530670166000000 51 -3.05400109291076660000 375 -2.83321332931518550000 90 -1.85001885890960700000 856 -8.40335273742675800000 398 -5.51342868804931600000 185 -3.29318785667419430000 5 | 296 -14.35600948333740200000 161 -19.14400100708007800000 1759 -11.25491619110107400000 9779 -15.93704795837402300000 139 -18.75156784057617200000 77 -15.02041721343994100000 1702 -14.33069133758545000000 352 -12.35352897644043000000 48 -14.13055992126464800000 483 -16.55573081970215000000 306 -15.56006050109863300000 62 -15.26224899291992200000 277 -18.71598243713379000000 390 -17.79666709899902300000 270 -14.89250278472900400000 207 -17.42243194580078000000 3890 -18.86130332946777300000 303 -13.50461196899414000000 150 -14.86264991760253900000 13 -16.31751060485840000000 2946 -12.35352897644043000000 344 -13.95553112030029300000 1983 -14.02750492095947300000 6349 -13.96296691894531200000 2967 -14.81026458740234400000 193 -15.26889419555664000000 2940 -15.21572971343994100000 232 -15.14673614501953100000 172 -14.77340316772461000000 231 -13.94615936279296900000 372 -14.36945724487304700000 313 -12.40759563446045000000 625 -14.08812999725341800000 136 -14.19916248321533200000 5568 -13.65281200408935500000 109 -14.96032524108886700000 447 -15.03910636901855500000 6991 -15.31535911560058600000 5795 -12.64121055603027300000 1695 -13.45214080810546900000 2846 -18.39932250976562500000 19 -19.71722984313965000000 135 -13.20082664489746100000 169 -14.48407363891601600000 9798 -14.58712100982666000000 170 -14.75675678253173800000 194 -13.43125247955322300000 901 -22.62669372558593800000 833 -15.21197032928466800000 8250 -15.47442436218261700000 8395 -15.88964557647705000000 1671 -15.70926380157470700000 1087 -18.82019233703613300000 9785 -14.08812999725341800000 4685 -16.38481521606445300000 87 -19.68305969238281200000 383 -15.09705352783203100000 243 -15.01611614227295000000 50 -14.74312496185302700000 210 -14.65165901184082000000 428 -17.57748222351074200000 181 -13.14198589324951200000 118 -15.79364109039306600000 361 -12.50767898559570300000 35 -19.11625862121582000000 57 -19.24988555908203000000 177 -19.31698036193847700000 353 -14.64494037628173800000 192 -11.25491619110107400000 2859 -15.76943206787109400000 25 -11.84270286560058600000 911 -18.64014625549316400000 3067 -15.98230361938476600000 336 -14.08812999725341800000 348 -14.34595870971679700000 165 -17.21075439453125000000 11337 -12.64121055603027300000 5533 -12.95966434478759800000 155 -14.01966190338134800000 1021 -16.20012283325195300000 267 -20.52173233032226600000 23 -12.23574542999267600000 212 -13.96296691894531200000 869 -15.03910636901855500000 273 -19.11201095581054700000 183 -14.43027877807617200000 74 -15.37902164459228500000 90 -21.99079132080078000000 55 -14.81026458740234400000 297 -15.75516986846923800000 2291 -15.51288986206054700000 5776 -13.45214080810546900000 148 -14.65321063995361300000 200 -18.96332740783691400000 2289 -15.47931385040283200000 125 -14.72065258026123000000 3294 -13.20082664489746100000 2854 -19.21733283996582000000 10611 -11.94806385040283200000 127 -15.98230361938476600000 287 -15.98230361938476600000 66 -15.42047309875488300000 5087 -12.35352897644043000000 256 -15.91682338714599600000 86 -17.96923828125000000000 2639 -18.97582244873047000000 1737 -16.56812286376953000000 137 -15.09252929687500000000 279 -13.34472084045410200000 123 -20.03662490844726600000 187 -14.44333267211914000000 151 -8.70673274993896500000 2725 -17.40551948547363300000 88 -14.15751934051513700000 304 -15.25126934051513700000 1720 -20.42506027221679700000 39 -16.94527626037597700000 281 -20.25970649719238300000 856 -16.04734992980957000000 67 -13.23453712463378900000 408 -15.94166660308837900000 145 -15.15861511230468800000 2847 -18.89118576049804700000 1988 -13.65281200408935500000 174 -14.42703628540039000000 106 -14.28141975402832000000 636 -14.14528846740722700000 4697 -14.96848869323730500000 814 -17.52779388427734400000 382 -15.48174953460693400000 9788 -12.64121055603027300000 6 | 9649 -0.47957307100296020000 6452 0.00000000000000000000 6444 -0.40546509623527527000 123 -0.40654030442237854000 335 0.00000000000000000000 4699 -0.55961579084396360000 9788 -0.69314718246459960000 7 | 296 -14.34691429138183600000 161 -19.13490676879882800000 1759 -11.24582195281982400000 9779 -15.92795276641845700000 139 -18.74247360229492200000 77 -15.01132202148437500000 1702 -14.32159614562988300000 352 -12.34443378448486300000 48 -14.12146568298339800000 483 -16.54663658142090000000 306 -15.55096530914306600000 62 -15.25315475463867200000 277 -18.70688629150390600000 390 -17.78757286071777300000 270 -14.88340759277343800000 207 -17.41333770751953000000 3890 -18.85220909118652300000 303 -13.49551677703857400000 150 -14.85355472564697300000 13 -16.30841636657715000000 2946 -12.34443378448486300000 344 -13.94643688201904300000 1983 -14.01841068267822300000 6349 -13.95387172698974600000 2967 -14.80116939544677700000 193 -15.25979900360107400000 2940 -15.20663452148437500000 232 -15.13764190673828100000 172 -14.76430797576904300000 231 -13.93706417083740200000 372 -14.36036300659179700000 313 -12.39850139617920000000 625 -14.07903480529785200000 136 -14.19006729125976600000 5568 -13.64371681213378900000 109 -14.95123004913330000000 447 -15.03001117706298800000 6991 -15.30626487731933600000 5795 -12.63211631774902300000 1695 -13.44304656982421900000 2846 -18.39022827148437500000 19 -19.70813560485840000000 135 -13.19173145294189500000 169 -14.47497844696045000000 9798 -14.57802581787109400000 170 -14.74766254425048800000 194 -13.42215728759765600000 901 -22.61759757995605500000 833 -15.20287609100341800000 8250 -15.46532917022705000000 8395 -15.88055038452148400000 1671 -15.70016860961914000000 1087 -18.81109619140625000000 9785 -14.07903480529785200000 4685 -16.37572097778320300000 87 -19.67396545410156200000 383 -15.08795833587646500000 243 -15.00702190399170000000 50 -14.73403072357177700000 210 -14.64256477355957000000 428 -17.56838607788086000000 181 -13.13289165496826200000 118 -15.78454685211181600000 361 -12.49858474731445300000 35 -19.10716247558593800000 57 -19.24079132080078000000 177 -19.30788612365722700000 353 -14.63584518432617200000 192 -11.24582195281982400000 2859 -15.76033782958984400000 25 -11.83360862731933600000 911 -18.63105201721191400000 3067 -15.97320938110351600000 336 -14.07903480529785200000 348 -14.33686351776123000000 165 -17.20165824890136700000 11337 -12.63211631774902300000 5533 -12.95056915283203100000 155 -14.01056671142578100000 1021 -16.19102859497070300000 267 -20.51263618469238300000 23 -12.22665119171142600000 212 -13.95387172698974600000 869 -15.03001117706298800000 273 -19.10291481018066400000 183 -14.42118358612060500000 74 -15.36992740631103500000 90 -21.98169708251953000000 55 -14.80116939544677700000 297 -15.74607563018798800000 2291 -15.50379562377929700000 5776 -13.44304656982421900000 148 -14.64411640167236300000 200 -18.95423126220703000000 2289 -15.47021961212158200000 125 -14.71155738830566400000 3294 -13.19173145294189500000 2854 -19.20823669433593800000 10611 -11.93896865844726600000 127 -15.97320938110351600000 287 -15.97320938110351600000 66 -15.41137886047363300000 5087 -12.34443378448486300000 256 -15.90772819519043000000 86 -17.96014404296875000000 2639 -18.96672630310058600000 1737 -16.55902671813965000000 137 -15.08343410491943400000 279 -13.33562564849853500000 123 -20.02753067016601600000 187 -14.43423843383789000000 151 -14.46263313293457000000 2725 -17.39642524719238300000 88 -14.14842510223388700000 304 -15.24217414855957000000 1720 -20.41596412658691400000 39 -16.93618011474609400000 281 -20.25061225891113300000 856 -16.03825569152832000000 67 -13.22544288635253900000 408 -15.93257141113281200000 145 -7.74120235443115200000 2847 -18.88209152221679700000 1988 -13.64371681213378900000 174 -14.41794204711914000000 106 -14.27232551574707000000 636 -14.13619327545166000000 4697 -14.95939350128173800000 814 -17.51869773864746000000 382 -15.47265529632568400000 9788 -12.63211631774902300000 8 | 83 -0.00313381641171872600 2816 0.00000000000000000000 2381 -0.00000000000000011102 281 -7.14849328994751000000 3397 -0.00000000000000011102 887 0.00000000000000000000 365 -0.01050452049821615200 254 -0.01600034162402153000 7238 0.00000000000000000000 9260 0.00000000000000000000 294 0.00000000000000000000 90 -7.90266132354736300000 5107 -0.00000000000000011102 2851 0.00000000000000000000 9 | 11 -0.00169964833185076710 87 -8.42814350128173800000 12421 -0.00000000000000011102 6442 -0.00000000000000011102 10 | 106 -4.41279840469360350000 256 -6.19943237304687500000 306 -10.05134677886962900000 210 -5.69932794570922850000 304 -3.67818045616149900000 170 -3.96793055534362800000 169 -7.31979131698608400000 11 | 86 -3.21781468391418460000 68 -4.02535152435302700000 281 -2.26002550125122070000 161 -7.19593715667724600000 2251 -3.34403896331787100000 1720 -3.71482205390930180000 272 -2.30258512496948240000 160 -4.05178499221801800000 328 -4.34380531311035200000 168 -3.19867300987243650000 1877 -2.76049661636352540000 3890 -4.08002710342407200000 656 -1.09861230850219730000 90 -2.46024370193481450000 185 -5.93224525451660200000 12 | 151 -10.77689170837402300000 869 -3.09104251861572270000 5774 -1.60943794250488280000 8202 0.00000000000000000000 851 -3.53368663787841800000 901 -1.41897511482238770000 2834 -0.78922098875045780000 212 -3.80666255950927730000 3150 0.00000000000000000000 5548 -2.25129175186157230000 67 -5.19849681854248050000 375 -1.73460102081298830000 243 -1.96944069862365720000 1656 -1.60943794250488280000 293 -1.25276291370391850000 3311 -2.19722461700439450000 13 | 296 -14.34774494171142600000 161 -19.13573646545410000000 1759 -11.24665164947509800000 9779 -15.92878341674804700000 139 -18.74330329895019500000 77 -15.01215267181396500000 1702 -14.32242679595947300000 352 -12.34526443481445300000 48 -14.12229537963867200000 483 -16.54746627807617200000 306 -15.55179595947265600000 62 -15.25398540496826200000 277 -18.70771789550781200000 390 -17.78840255737304700000 270 -14.88423824310302700000 207 -17.41416740417480500000 3890 -18.85303878784179700000 303 -13.49634742736816400000 150 -14.85438537597656200000 13 -16.30924606323242200000 2946 -12.34526443481445300000 344 -13.94726657867431600000 1983 -14.01924037933349600000 6349 -13.95470237731933600000 2967 -14.80200004577636700000 193 -15.26062965393066400000 2940 -15.20746517181396500000 232 -15.13847160339355500000 172 -14.76513862609863300000 231 -13.93789482116699200000 372 -14.36119270324707000000 313 -12.39933109283447300000 625 -14.07986545562744100000 136 -14.19089794158935500000 5568 -13.64454746246337900000 109 -14.95206069946289000000 447 -15.03084182739257800000 6991 -15.30709457397461000000 5795 -12.63294601440429700000 1695 -13.44387626647949200000 2846 -18.39105987548828000000 19 -19.70896720886230500000 135 -13.19256210327148400000 169 -14.47580909729003900000 9798 -14.57885646820068400000 170 -14.74849224090576200000 194 -13.42298793792724600000 901 -22.61842918395996000000 833 -15.20370578765869100000 8250 -15.46615982055664000000 8395 -15.88138103485107400000 1671 -15.70099925994873000000 1087 -18.81192779541015600000 9785 -14.07986545562744100000 4685 -16.37655067443847700000 87 -19.67479515075683600000 383 -15.08878898620605500000 243 -15.00785160064697300000 50 -14.73486042022705000000 210 -14.64339447021484400000 428 -17.56921768188476600000 181 -13.13372135162353500000 118 -15.78537654876709000000 361 -12.49941444396972700000 35 -19.10799407958984400000 57 -19.24162101745605500000 177 -19.30871772766113300000 353 -14.63667583465576200000 192 -11.24665164947509800000 2859 -15.76116752624511700000 25 -11.83443832397461000000 911 -18.63188362121582000000 3067 -15.97404003143310500000 336 -14.07986545562744100000 348 -14.33769416809082000000 165 -17.20248985290527300000 11337 -12.63294601440429700000 5533 -12.95139980316162100000 155 -14.01139736175537100000 1021 -16.19185829162597700000 267 -20.51346778869629000000 23 -12.22748088836670000000 212 -13.95470237731933600000 869 -15.03084182739257800000 273 -19.10374641418457000000 183 -14.42201423645019500000 74 -15.37075710296630900000 90 -21.98252677917480500000 55 -14.80200004577636700000 297 -15.74690532684326200000 2291 -15.50462532043457000000 5776 -13.44387626647949200000 148 -14.64494609832763700000 200 -18.95506286621093800000 2289 -15.47104930877685500000 125 -14.71238803863525400000 3294 -13.19256210327148400000 2854 -19.20906829833984400000 10611 -11.93979930877685500000 127 -15.97404003143310500000 287 -15.97404003143310500000 66 -15.41220855712890600000 5087 -12.34526443481445300000 256 -15.90855884552002000000 86 -17.96097373962402300000 2639 -18.96755790710449200000 1737 -16.55985832214355500000 137 -15.08426475524902300000 279 -13.33645629882812500000 123 -20.02836036682129000000 187 -14.43506813049316400000 151 -7.60194778442382800000 2725 -17.39725494384765600000 88 -14.14925479888916000000 304 -15.24300479888916000000 1720 -20.41679573059082000000 39 -16.93701171875000000000 281 -20.25144195556640600000 856 -16.03908538818359400000 67 -13.22627258300781200000 408 -15.93340206146240200000 145 -15.15035057067871100000 2847 -18.88292121887207000000 1988 -13.64454746246337900000 174 -14.41877174377441400000 106 -14.27315521240234400000 636 -14.13702392578125000000 4697 -14.96022415161132800000 814 -17.51952934265136700000 382 -15.47348594665527300000 9788 -12.63294601440429700000 14 | 296 -14.40812492370605500000 161 -19.19611740112304700000 1759 -11.30703258514404300000 9779 -15.98916339874267600000 139 -18.80368423461914000000 77 -15.07253265380859400000 1702 -14.38280677795410200000 352 -12.40564441680908200000 48 -14.18267631530761700000 483 -16.60784721374511700000 306 -15.61217594146728500000 62 -15.31436538696289000000 277 -18.76809692382812500000 390 -17.84878349304199200000 270 -14.94461822509765600000 207 -17.47454833984375000000 3890 -18.91341972351074200000 303 -13.55672836303711000000 150 -14.91476535797119100000 13 -16.36962699890136700000 2946 -12.40564441680908200000 344 -14.00764751434326200000 1983 -14.07962131500244100000 6349 -14.01508235931396500000 2967 -14.86238002777099600000 193 -15.32100963592529300000 2940 -15.26784515380859400000 232 -15.19885253906250000000 172 -14.82551860809326200000 231 -13.99827575683593800000 372 -14.42157363891601600000 313 -12.45971202850341800000 625 -14.14024543762207000000 136 -14.25127792358398400000 5568 -13.70492744445800800000 109 -15.01244068145752000000 447 -15.09122180938720700000 6991 -15.36747550964355500000 5795 -12.69332695007324200000 1695 -13.50425720214843800000 2846 -18.45143890380859400000 19 -19.76934623718261700000 135 -13.25294208526611300000 169 -10.94003677368164000000 9798 -14.63923645019531200000 170 -14.80887317657470700000 194 -13.48336791992187500000 901 -22.67880821228027300000 833 -15.26408672332763700000 8250 -15.52653980255127000000 8395 -15.94176101684570300000 1671 -15.76137924194336000000 1087 -18.87230682373047000000 9785 -14.14024543762207000000 4685 -16.43693161010742200000 87 -19.73517608642578000000 383 -15.14916896820068400000 243 -15.06823253631591800000 50 -14.79524135589599600000 210 -14.70377540588378900000 428 -17.62959671020507800000 181 -13.19410228729248000000 118 -15.84575748443603500000 361 -12.55979537963867200000 35 -19.16837501525879000000 57 -19.30200195312500000000 177 -19.36909675598144500000 353 -14.69705677032470700000 192 -11.30703258514404300000 2859 -15.82154846191406200000 25 -11.89481925964355500000 911 -18.69226264953613300000 3067 -16.03442001342773400000 336 -14.14024543762207000000 348 -14.39807510375976600000 165 -17.26286888122558600000 11337 -12.69332695007324200000 5533 -13.01178073883056600000 155 -14.07177734375000000000 1021 -16.25223922729492200000 267 -20.57384681701660000000 23 -12.28786182403564500000 212 -14.01508235931396500000 869 -15.09122180938720700000 273 -19.16412544250488300000 183 -14.48239421844482400000 74 -15.43113803863525400000 90 -22.04290771484375000000 55 -14.86238002777099600000 297 -15.80728626251220700000 2291 -15.56500625610351600000 5776 -13.50425720214843800000 148 -14.70532703399658200000 200 -19.01544380187988300000 2289 -15.53143024444580000000 125 -14.77276802062988300000 3294 -13.25294208526611300000 2854 -19.26944732666015600000 10611 -12.00017929077148400000 127 -16.03442001342773400000 287 -16.03442001342773400000 66 -15.47258949279785200000 5087 -12.40564441680908200000 256 -15.96893882751464800000 86 -18.02135467529297000000 2639 -19.02793693542480500000 1737 -16.62023735046386700000 137 -15.14464473724365200000 279 -13.39683628082275400000 123 -20.08874130249023400000 187 -14.49544906616211000000 151 -14.52384376525878900000 2725 -17.45763587951660000000 88 -14.20963573455810500000 304 -15.30338478088378900000 1720 -20.47717475891113300000 39 -16.99739265441894500000 281 -20.31182289123535000000 856 -16.09946632385254000000 67 -13.28665351867675800000 408 -15.99378204345703100000 145 -11.18618392944336000000 2847 -18.94330215454101600000 1988 -13.70492744445800800000 174 -14.47915267944336000000 106 -14.33353614807128900000 636 -14.19740390777587900000 4697 -15.02060413360595700000 814 -17.57990837097168000000 382 -15.53386592864990200000 9788 -12.69332695007324200000 15 | 296 -14.34585666656494100000 161 -19.13384819030761700000 844 -1.35219681262969970000 1759 -11.24476432800293000000 9779 -15.92689514160156200000 139 -18.74141693115234400000 77 -15.01026439666748000000 1702 -14.32053852081298800000 352 -12.34337615966796900000 48 -14.12040805816650400000 483 -16.54557800292968800000 306 -15.54990768432617200000 62 -15.25209712982177700000 277 -18.70582962036132800000 390 -17.78651428222656200000 270 -14.88234996795654300000 207 -17.41228103637695300000 3890 -18.85115051269531200000 303 -13.49445915222168000000 150 -14.85249710083007800000 13 -16.30735969543457000000 2946 -12.34337615966796900000 558 -4.93554449081420900000 438 -2.48717689514160160000 344 -13.94537925720214800000 1983 -14.01735305786132800000 6349 -13.95281410217285200000 2967 -14.80011177062988300000 193 -15.25874137878418000000 2940 -15.20557689666748000000 232 -15.13658428192138700000 172 -14.76325035095214800000 231 -13.93600654602050800000 372 -14.35930538177490200000 313 -12.39744377136230500000 4057 -4.48527288436889650000 625 -14.07797718048095700000 136 -14.18900966644287100000 2316 -1.94818031787872310000 5568 -13.64265918731689500000 109 -14.95017242431640600000 447 -15.02895355224609400000 6991 -15.30520725250244100000 5795 -12.63105869293212900000 1695 -13.44198894500732400000 2846 -18.38917160034179700000 19 -19.70707893371582000000 135 -13.19067382812500000000 169 -14.47392082214355500000 9798 -14.57696819305420000000 170 -14.74660491943359400000 194 -13.42109966278076200000 901 -22.61654090881347700000 833 -15.20181846618652300000 8250 -15.46427154541015600000 8395 -15.87949275970459000000 1671 -15.69911098480224600000 689 -4.27640771865844700000 1087 -18.81003952026367200000 9785 -14.07797718048095700000 4685 -16.37466239929199200000 87 -19.67290687561035000000 383 -15.08690071105957000000 681 -4.71098089218139650000 243 -15.00596427917480500000 50 -14.73297309875488300000 210 -14.64150714874267600000 428 -17.56732940673828000000 181 -13.13183307647705000000 118 -15.78348922729492200000 361 -12.49752712249755900000 35 -19.10610580444336000000 57 -19.23973274230957000000 177 -19.30682945251465000000 353 -14.63478755950927700000 192 -11.24476432800293000000 2859 -15.75927925109863300000 25 -11.83255100250244100000 911 -18.62999534606933600000 3067 -2.65021467208862300000 336 -14.07797718048095700000 348 -14.33580589294433600000 165 -17.20060157775879000000 11337 -12.63105869293212900000 5533 -12.94951152801513700000 155 -14.00950908660888700000 1021 -16.18997192382812500000 267 -20.51157951354980500000 23 -12.22559356689453100000 212 -13.95281410217285200000 869 -15.02895355224609400000 273 -19.10185813903808600000 183 -14.42012596130371100000 74 -15.36886978149414000000 90 -21.98063850402832000000 552 -6.93812704086303700000 55 -14.80011177062988300000 297 -15.74501800537109400000 3484 -0.36999493837356570000 2291 -15.50273799896240200000 5776 -13.44198894500732400000 148 -14.64305877685546900000 200 -18.95317459106445300000 2289 -15.46916198730468800000 125 -14.71049976348877000000 3294 -13.19067382812500000000 2854 -19.20718002319336000000 10611 -11.93791103363037100000 127 -15.97215175628662100000 287 -15.97215175628662100000 66 -15.41032123565673800000 5087 -12.34337615966796900000 256 -15.90667057037353500000 86 -17.95908546447754000000 2639 -18.96566963195800800000 1737 -16.55797004699707000000 137 -15.08237648010253900000 279 -13.33456802368164000000 123 -20.02647209167480500000 187 -14.43318080902099600000 151 -14.46157550811767600000 2725 -17.39536666870117200000 88 -14.14736747741699200000 304 -15.24111652374267600000 1720 -20.41490745544433600000 39 -16.93512344360351600000 281 -20.24955558776855500000 855 -1.79402959346771240000 856 -16.03719902038574200000 67 -13.22438526153564500000 408 -15.93151378631591800000 145 -15.14846324920654300000 2847 -18.88103294372558600000 1988 -13.64265918731689500000 174 -14.41688442230224600000 106 -14.27126789093017600000 636 -14.13513565063476600000 4697 -14.95833587646484400000 814 -17.51764106750488300000 382 -15.47159767150878900000 9788 -12.63105869293212900000 16 | 77 -11.17236518859863300000 306 -16.24706840515136700000 172 -20.45269012451172000000 372 -20.04874420166015600000 136 -19.87845039367675800000 169 -14.35131454467773400000 170 -20.43604469299316400000 833 -20.89125823974609400000 118 -15.35219955444336000000 297 -16.11398696899414000000 148 -20.33249855041504000000 256 -21.59611129760742200000 137 -11.44262981414795000000 151 -20.15101432800293000000 88 -19.83680725097656200000 304 -20.93055725097656200000 145 -14.55738830566406200000 174 -13.27150344848632800000 17 | 86 -4.03158998489379900000 68 -4.02535152435302700000 2185 -1.09861230850219730000 281 -3.14229702949523930000 2846 -3.29425954818725600000 161 -7.88908433914184600000 272 -2.30258512496948240000 160 -4.05178499221801800000 168 -3.89182019233703600000 38 -3.86423230171203600000 1877 -1.32724535465240480000 90 -0.66556012630462650000 856 -7.71020507812500000000 398 -2.29455280303955100000 185 -4.22749710083007800000 18 | 77 -11.17236518859863300000 306 -16.24706840515136700000 172 -20.45269012451172000000 372 -20.04874420166015600000 136 -19.87845039367675800000 169 -14.35131454467773400000 170 -20.43604469299316400000 833 -20.89125823974609400000 118 -15.35219955444336000000 297 -16.11398696899414000000 148 -20.33249855041504000000 256 -21.59611129760742200000 137 -11.44262981414795000000 151 -20.15101432800293000000 88 -19.83680725097656200000 304 -20.93055725097656200000 145 -14.55738830566406200000 174 -13.27150344848632800000 19 | 3478 0.00000000000000022204 6248 -0.00000000000000011102 1730 0.00000000000000000000 830 0.00000000000000000000 5062 0.00000000000000000000 5081 0.00000000000000000000 2643 0.00000000000000000000 2971 -0.28768208622932434000 3303 0.00000000000000000000 450 0.00000000000000022204 2184 0.00000000000000000000 5594 0.00000000000000022204 867 0.00000000000000022204 1879 0.00000000000000000000 2324 0.00000000000000000000 679 0.00000000000000000000 712 -0.00025043825735338030 552 -0.00097276270389556880 3076 -0.00000000000000022204 5752 0.00000000000000000000 20 | 151 -10.77689170837402300000 869 -3.09104251861572270000 5774 -1.60943794250488280000 8202 0.00000000000000000000 851 -3.53368663787841800000 901 -1.41897511482238770000 2834 -0.78922098875045780000 212 -3.80666255950927730000 3150 0.00000000000000000000 5548 -2.25129175186157230000 67 -5.19849681854248050000 375 -1.73460102081298830000 243 -1.96944069862365720000 1656 -1.60943794250488280000 293 -1.25276291370391850000 3311 -2.19722461700439450000 21 | 296 -14.35306549072265600000 161 -19.14105606079101600000 1759 -11.25197219848632800000 9779 -15.93410396575927700000 139 -18.74862480163574200000 77 -15.01747322082519500000 1702 -14.32774734497070300000 352 -12.35058498382568400000 48 -14.12761688232421900000 483 -16.55278587341308600000 306 -8.96084690093994100000 62 -15.25930595397949200000 277 -18.71303749084472700000 390 -17.79372406005859400000 270 -14.88955879211425800000 207 -17.41948890686035000000 3890 -18.85836029052734400000 303 -13.50166797637939500000 150 -14.85970592498779300000 13 -16.31456756591797000000 2946 -12.35058498382568400000 344 -13.95258808135986300000 1983 -14.02456092834472700000 6349 -13.96002292633056600000 2967 -14.80732059478759800000 193 -15.26595020294189500000 2940 -15.21278572082519500000 232 -15.14379310607910200000 172 -14.77045917510986300000 231 -13.94321537017822300000 372 -14.36651420593261700000 313 -12.40465164184570300000 625 -14.08518600463867200000 136 -14.19621849060058600000 5568 -13.64986801147461000000 109 -14.95738124847412100000 447 -15.03616237640380900000 6991 -15.31241512298584000000 5795 -12.63826656341552700000 1695 -13.44919681549072300000 2846 -18.39637947082519500000 19 -19.71428680419922000000 135 -13.19788265228271500000 169 -10.88497638702392600000 9798 -14.58417701721191400000 170 -14.75381374359130900000 194 -13.42830848693847700000 901 -22.62374877929687500000 833 -15.20902633666992200000 8250 -15.47148036956787100000 8395 -15.88670158386230500000 1671 -15.70631980895996100000 1087 -18.81724739074707000000 9785 -14.08518600463867200000 4685 -16.38187026977539000000 87 -19.68011665344238300000 383 -15.09410953521728500000 243 -15.01317214965820300000 50 -14.74018096923828100000 210 -14.64871597290039000000 428 -17.57453727722168000000 181 -13.13904190063476600000 118 -15.79069805145263700000 361 -12.50473499298095700000 35 -19.11331367492675800000 57 -19.24694252014160000000 177 -19.31403732299804700000 353 -14.64199638366699200000 192 -11.25197219848632800000 2859 -15.76648807525634800000 25 -11.83975887298584000000 911 -18.63720321655273400000 3067 -15.97936058044433600000 336 -14.08518600463867200000 348 -14.34301471710205000000 165 -17.20780944824218800000 11337 -12.63826656341552700000 5533 -12.95672035217285200000 155 -14.01671791076660200000 1021 -16.19717979431152300000 267 -20.51878738403320300000 23 -12.23280143737793000000 212 -13.96002292633056600000 869 -15.03616237640380900000 273 -19.10906600952148400000 183 -14.42733478546142600000 74 -15.37607765197753900000 90 -21.98784637451172000000 55 -14.80732059478759800000 297 -8.82623863220214800000 2291 -15.50994682312011700000 5776 -13.44919681549072300000 148 -14.65026664733886700000 200 -18.96038246154785000000 2289 -15.47636985778808600000 125 -14.71770858764648400000 3294 -13.19788265228271500000 2854 -19.21438789367675800000 10611 -11.94511985778808600000 127 -15.97936058044433600000 287 -15.97936058044433600000 66 -15.41753005981445300000 5087 -12.35058498382568400000 256 -15.91387939453125000000 86 -17.96629524230957000000 2639 -18.97287750244140600000 1737 -16.56517791748047000000 137 -15.08958530426025400000 279 -13.34177684783935500000 123 -20.03368186950683600000 187 -14.44038867950439500000 151 -14.46878337860107400000 2725 -17.40257453918457000000 88 -14.15457534790039000000 304 -15.24832534790039000000 1720 -20.42211532592773400000 39 -16.94233131408691400000 281 -20.25676345825195300000 856 -16.04440689086914000000 67 -13.23159408569336000000 408 -15.93872261047363300000 145 -9.06744384765625000000 2847 -18.88824272155761700000 1988 -13.64986801147461000000 174 -14.42409229278564500000 106 -14.27847671508789000000 636 -14.14234447479248000000 4697 -14.96554470062255900000 814 -17.52484893798828000000 382 -15.47880649566650400000 9788 -12.63826656341552700000 22 | 77 -16.79975318908691400000 306 -17.33939743041992200000 172 -16.55274009704590000000 372 -16.14879417419433600000 136 -15.97849941253662100000 169 -12.88356685638427700000 170 -16.53609466552734400000 833 -16.99130821228027300000 118 -17.57297897338867200000 297 -17.53450775146484400000 148 -16.43254852294922000000 256 -17.69615936279297000000 137 -16.87186622619629000000 151 -16.25106430053711000000 88 -15.93685626983642600000 304 -17.03060531616211000000 145 -12.18087291717529300000 174 -16.20637321472168000000 23 | 83 -0.00313381641171872600 2816 0.00000000000000000000 2381 -0.00000000000000011102 281 -7.14849328994751000000 3397 -0.00000000000000011102 887 0.00000000000000000000 365 -0.01050452049821615200 254 -0.01600034162402153000 7238 0.00000000000000000000 9260 0.00000000000000000000 294 0.00000000000000000000 90 -7.90266132354736300000 5107 -0.00000000000000011102 2851 0.00000000000000000000 24 | 151 -10.77689170837402300000 869 -3.09104251861572270000 5774 -1.60943794250488280000 8202 0.00000000000000000000 851 -3.53368663787841800000 901 -1.41897511482238770000 2834 -0.78922098875045780000 212 -3.80666255950927730000 3150 0.00000000000000000000 5548 -2.25129175186157230000 67 -5.19849681854248050000 375 -1.73460102081298830000 243 -1.96944069862365720000 1656 -1.60943794250488280000 293 -1.25276291370391850000 3311 -2.19722461700439450000 25 | 296 -14.34812164306640600000 161 -19.13611412048340000000 1759 -11.24702930450439500000 9779 -15.92916011810302700000 139 -18.74368095397949200000 77 -15.01252937316894500000 1702 -14.32280349731445300000 352 -12.34564113616943400000 48 -14.12267303466796900000 483 -16.54784393310547000000 306 -15.55217266082763700000 62 -15.25436210632324200000 277 -18.70809364318847700000 390 -17.78878021240234400000 270 -14.88461494445800800000 207 -17.41454505920410000000 3890 -18.85341644287109400000 303 -13.49672412872314500000 150 -5.75222301483154300000 13 -16.30962371826172000000 2946 -12.34564113616943400000 344 -13.94764423370361300000 1983 -14.01961803436279300000 6349 -13.95507907867431600000 2967 -14.80237674713134800000 193 -15.26100635528564500000 2940 -15.20784187316894500000 232 -15.13884925842285200000 172 -14.76551532745361300000 231 -13.93827152252197300000 372 -14.36157035827636700000 313 -12.39970874786377000000 625 -14.08024215698242200000 136 -14.19127464294433600000 5568 -13.64492416381836000000 109 -14.95243740081787100000 447 -15.03121852874755900000 6991 -15.30747222900390600000 5795 -12.63332366943359400000 1695 -13.44425392150878900000 2846 -18.39143562316894500000 19 -19.70934295654297000000 135 -13.19293880462646500000 169 -14.47618579864502000000 9798 -14.57923316955566400000 170 -14.74886989593505900000 194 -13.42336463928222700000 901 -22.61880493164062500000 833 -15.20408344268798800000 8250 -15.46653652191162100000 8395 -15.88175773620605500000 1671 -15.70137596130371100000 1087 -18.81230354309082000000 9785 -14.08024215698242200000 4685 -16.37692832946777300000 87 -19.67517280578613300000 383 -15.08916568756103500000 243 -15.00822925567627000000 50 -14.73523807525634800000 210 -14.64377212524414000000 428 -17.56959342956543000000 181 -13.13409900665283200000 118 -15.78575420379638700000 361 -12.49979209899902300000 35 -19.10836982727050800000 57 -19.24199867248535000000 177 -19.30909347534179700000 353 -14.63705253601074200000 192 -11.24702930450439500000 2859 -15.76154518127441400000 25 -11.83481597900390600000 911 -18.63225936889648400000 3067 -15.97441673278808600000 336 -14.08024215698242200000 348 -14.33807182312011700000 165 -17.20286560058593800000 11337 -12.63332366943359400000 5533 -12.95177745819091800000 155 -14.01177406311035200000 1021 -16.19223594665527300000 267 -20.51384353637695300000 23 -12.22785854339599600000 212 -13.95507907867431600000 869 -15.03121852874755900000 273 -19.10412216186523400000 183 -14.42239093780517600000 74 -15.37113475799560500000 90 -21.98290443420410000000 55 -14.80237674713134800000 297 -15.74728298187255900000 2291 -15.50500297546386700000 5776 -13.44425392150878900000 148 -14.64532375335693400000 200 -18.95544052124023400000 2289 -5.32750558853149400000 125 -14.71276473999023400000 3294 -13.19293880462646500000 2854 -19.20944404602050800000 10611 -11.94017601013183600000 127 -15.97441673278808600000 287 -15.97441673278808600000 66 -15.41258621215820300000 5087 -12.34564113616943400000 256 -15.90893554687500000000 86 -17.96135139465332000000 2639 -18.96793365478515600000 1737 -16.56023406982422000000 137 -15.08464145660400400000 279 -13.33683300018310500000 123 -20.02873802185058600000 187 -14.43544578552246100000 151 -14.46384048461914000000 2725 -17.39763259887695300000 88 -14.14963245391845700000 304 -15.24338150024414000000 1720 -20.41717147827148400000 39 -16.93738746643066400000 281 -20.25181961059570300000 856 -16.03946304321289000000 67 -13.22665023803711000000 408 -15.93377876281738300000 145 -8.12290096282959000000 2847 -18.88329887390136700000 1988 -13.64492416381836000000 174 -14.41914939880371100000 106 -14.27353286743164000000 636 -14.13740062713623000000 4697 -14.96060085296630900000 814 -17.51990509033203000000 382 -15.47386264801025400000 9788 -12.63332366943359400000 26 | 296 -14.34590911865234400000 161 -6.28196668624877900000 1759 -11.24481678009033200000 9779 -15.92694759368896500000 139 -18.74146842956543000000 77 -15.01031780242920000000 1702 -14.32059192657470700000 352 -12.34342861175537100000 48 -14.12046051025390600000 483 -16.54563140869140600000 306 -15.54996013641357400000 62 -15.25214958190918000000 277 -18.70588302612304700000 390 -17.78656768798828000000 270 -14.88240242004394500000 207 -17.41233253479004000000 3890 -18.85120391845703000000 303 -13.49451255798339800000 150 -14.85254955291748000000 13 -16.30741119384765600000 2946 -12.34342861175537100000 344 -13.94543170928955000000 1983 -14.01740550994873000000 6349 -13.95286655426025400000 2967 -14.80016422271728500000 193 -15.25879478454589800000 2940 -15.20562934875488300000 232 -15.13663673400878900000 172 -14.76330280303955000000 231 -13.93605995178222700000 372 -14.35935783386230500000 313 -12.39749622344970700000 625 -14.07802963256836000000 136 -14.18906307220459000000 5568 -13.64271163940429700000 109 -14.95022583007812500000 185 -5.52910280227661100000 447 -15.02900600433349600000 6991 -15.30525970458984400000 5795 -12.63111114501953100000 1695 -13.44204139709472700000 2846 -18.38922309875488300000 19 -19.70713043212890600000 135 -13.19072723388671900000 169 -14.47397422790527300000 9798 -14.57702159881591800000 170 -14.74665737152099600000 194 -13.42115306854248000000 901 -22.61659240722656200000 833 -15.20187091827392600000 8250 -15.46432399749755900000 8395 -15.87954521179199200000 1671 -15.69916439056396500000 38 -4.96516752243042000000 1087 -18.81009101867675800000 9785 -14.07802963256836000000 4685 -16.37471580505371000000 87 -19.67296028137207000000 383 -15.08695316314697300000 243 -15.00601673126220700000 50 -14.73302555084228500000 210 -14.64155960083007800000 428 -17.56738281250000000000 181 -13.13188648223877000000 118 -9.66281223297119100000 361 -12.49757957458496100000 35 -19.10615921020507800000 57 -19.23978614807129000000 177 -19.30688095092773400000 353 -14.63484096527099600000 192 -11.24481678009033200000 2859 -15.75933265686035200000 25 -11.83260345458984400000 911 -18.63004684448242200000 3067 -15.97220420837402300000 336 -14.07802963256836000000 348 -14.33585929870605500000 165 -17.20065307617187500000 11337 -12.63111114501953100000 5533 -12.94956493377685500000 155 -14.00956249237060500000 1021 -16.19002342224121000000 267 -20.51163101196289000000 23 -12.22564601898193400000 212 -13.95286655426025400000 869 -15.02900600433349600000 273 -19.10190963745117200000 183 -14.42017841339111300000 74 -15.36892223358154300000 90 -7.48010063171386700000 55 -14.80016422271728500000 297 -15.74507045745849600000 2291 -15.50279045104980500000 5776 -13.44204139709472700000 148 -14.64311122894287100000 200 -18.95322799682617200000 2289 -15.46921443939209000000 125 -14.71055221557617200000 3294 -13.19072723388671900000 2854 -19.20723152160644500000 10611 -11.93796348571777300000 127 -15.97220420837402300000 287 -15.97220420837402300000 66 -15.41037368774414000000 5087 -12.34342861175537100000 256 -15.90672397613525400000 86 -8.79598140716552700000 2639 -18.96572113037109400000 1737 -16.55802345275879000000 137 -15.08242893218994100000 279 -13.33462142944336000000 123 -20.02652549743652300000 187 -14.43323326110839800000 151 -14.46162796020507800000 2725 -17.39542007446289000000 88 -14.14741992950439500000 304 -15.24116897583007800000 1720 -20.41495895385742200000 39 -16.93517684936523400000 281 -6.72473049163818400000 856 -16.03725051879882800000 67 -13.22443771362304700000 408 -15.93156719207763700000 145 -15.14851570129394500000 2847 -18.88108634948730500000 1988 -13.64271163940429700000 174 -14.41693687438964800000 106 -14.27132034301757800000 636 -14.13518810272216800000 4697 -14.95838832855224600000 814 -17.51769447326660000000 382 -15.47165012359619100000 9788 -12.63111114501953100000 27 | 151 -10.08374404907226600000 35 -5.55875682830810550000 655 -0.69314718246459960000 2294 -2.39789533615112300000 215 -2.07944154739379900000 851 -2.97407078742980960000 901 -0.64952290058135990000 2834 -3.26575946807861330000 5548 -0.86499744653701780000 5801 -0.26469254493713380000 303 -5.74620342254638700000 11166 -0.00000000000000011102 145 -11.83282566070556600000 511 -1.31567680835723880000 173 -0.00000000000000011102 1656 -3.05635690689086900000 169 -10.21016311645507800000 2220 -1.60943794250488280000 3311 -0.11778303235769272000 28 | 77 -18.71667671203613300000 306 -15.34916496276855500000 172 -18.46966171264648400000 372 -18.06571769714355500000 136 -15.18816661834716800000 169 -15.07688522338867200000 170 -18.45301628112793000000 833 -18.90822982788086000000 118 -14.28217315673828100000 297 -19.45142936706543000000 148 -18.34947013854980500000 256 -19.61308288574218800000 137 -18.78878784179687500000 151 -11.58827304840087900000 88 -17.85377883911132800000 304 -18.94752883911132800000 145 -15.04901313781738300000 174 -14.65895557403564500000 29 | 296 -14.34585666656494100000 161 -19.13384819030761700000 844 -1.35219681262969970000 1759 -11.24476432800293000000 9779 -15.92689514160156200000 139 -18.74141693115234400000 77 -15.01026439666748000000 1702 -14.32053852081298800000 352 -12.34337615966796900000 48 -14.12040805816650400000 483 -16.54557800292968800000 306 -15.54990768432617200000 62 -15.25209712982177700000 277 -18.70582962036132800000 390 -17.78651428222656200000 270 -14.88234996795654300000 207 -17.41228103637695300000 3890 -18.85115051269531200000 303 -13.49445915222168000000 150 -14.85249710083007800000 13 -16.30735969543457000000 2946 -12.34337615966796900000 558 -4.93554449081420900000 438 -2.48717689514160160000 344 -13.94537925720214800000 1983 -14.01735305786132800000 6349 -13.95281410217285200000 2967 -14.80011177062988300000 193 -15.25874137878418000000 2940 -15.20557689666748000000 232 -15.13658428192138700000 172 -14.76325035095214800000 231 -13.93600654602050800000 372 -14.35930538177490200000 313 -12.39744377136230500000 4057 -4.48527288436889650000 625 -14.07797718048095700000 136 -14.18900966644287100000 2316 -1.94818031787872310000 5568 -13.64265918731689500000 109 -14.95017242431640600000 447 -15.02895355224609400000 6991 -15.30520725250244100000 5795 -12.63105869293212900000 1695 -13.44198894500732400000 2846 -18.38917160034179700000 19 -19.70707893371582000000 135 -13.19067382812500000000 169 -14.47392082214355500000 9798 -14.57696819305420000000 170 -14.74660491943359400000 194 -13.42109966278076200000 901 -22.61654090881347700000 833 -15.20181846618652300000 8250 -15.46427154541015600000 8395 -15.87949275970459000000 1671 -15.69911098480224600000 689 -4.27640771865844700000 1087 -18.81003952026367200000 9785 -14.07797718048095700000 4685 -16.37466239929199200000 87 -19.67290687561035000000 383 -15.08690071105957000000 681 -4.71098089218139650000 243 -15.00596427917480500000 50 -14.73297309875488300000 210 -14.64150714874267600000 428 -17.56732940673828000000 181 -13.13183307647705000000 118 -15.78348922729492200000 361 -12.49752712249755900000 35 -19.10610580444336000000 57 -19.23973274230957000000 177 -19.30682945251465000000 353 -14.63478755950927700000 192 -11.24476432800293000000 2859 -15.75927925109863300000 25 -11.83255100250244100000 911 -18.62999534606933600000 3067 -2.65021467208862300000 336 -14.07797718048095700000 348 -14.33580589294433600000 165 -17.20060157775879000000 11337 -12.63105869293212900000 5533 -12.94951152801513700000 155 -14.00950908660888700000 1021 -16.18997192382812500000 267 -20.51157951354980500000 23 -12.22559356689453100000 212 -13.95281410217285200000 869 -15.02895355224609400000 273 -19.10185813903808600000 183 -14.42012596130371100000 74 -15.36886978149414000000 90 -21.98063850402832000000 552 -6.93812704086303700000 55 -14.80011177062988300000 297 -15.74501800537109400000 3484 -0.36999493837356570000 2291 -15.50273799896240200000 5776 -13.44198894500732400000 148 -14.64305877685546900000 200 -18.95317459106445300000 2289 -15.46916198730468800000 125 -14.71049976348877000000 3294 -13.19067382812500000000 2854 -19.20718002319336000000 10611 -11.93791103363037100000 127 -15.97215175628662100000 287 -15.97215175628662100000 66 -15.41032123565673800000 5087 -12.34337615966796900000 256 -15.90667057037353500000 86 -17.95908546447754000000 2639 -18.96566963195800800000 1737 -16.55797004699707000000 137 -15.08237648010253900000 279 -13.33456802368164000000 123 -20.02647209167480500000 187 -14.43318080902099600000 151 -14.46157550811767600000 2725 -17.39536666870117200000 88 -14.14736747741699200000 304 -15.24111652374267600000 1720 -20.41490745544433600000 39 -16.93512344360351600000 281 -20.24955558776855500000 855 -1.79402959346771240000 856 -16.03719902038574200000 67 -13.22438526153564500000 408 -15.93151378631591800000 145 -15.14846324920654300000 2847 -18.88103294372558600000 1988 -13.64265918731689500000 174 -14.41688442230224600000 106 -14.27126789093017600000 636 -14.13513565063476600000 4697 -14.95833587646484400000 814 -17.51764106750488300000 382 -15.47159767150878900000 9788 -12.63105869293212900000 30 | 1671 -0.24965468049049377000 29 -0.72391885519027710000 3396 0.00000000000000000000 281 -10.61422920227050800000 2725 -0.06838385760784149000 3105 -1.17272031307220460000 2846 -6.45126008987426800000 2251 -4.44265127182006800000 650 -0.05715841427445412000 3492 -0.40546509623527527000 1721 -0.19415602087974548000 390 -2.75756168365478500000 519 -0.64535653591156010000 2362 -0.19237188994884490000 160 -4.74493217468261700000 767 -1.27160251140594480000 2967 -1.15745282173156740000 90 -9.34958076477050800000 856 -7.01705789566040000000 6239 -3.68887948989868160000 31 | 86 -4.17864322662353500000 281 -2.59395861625671400000 118 -9.66268920898437500000 161 -7.88908433914184600000 1720 -5.40894317626953100000 2715 -3.29583692550659200000 160 -4.74493217468261700000 328 -4.34380531311035200000 168 -3.89182019233703600000 1877 -3.62069797515869140000 3890 -5.40916299819946300000 51 -4.66343927383422850000 90 -3.39828705787658700000 398 -4.41481637954711900000 32 | 383 -6.32704353332519500000 2940 -4.65396022796630900000 136 -2.83989429473876950000 313 -3.63758611679077150000 395 -0.91629076004028320000 145 -6.92755079269409200000 372 -4.97208833694458000000 177 -6.19026279449462900000 33 | 296 -14.34656715393066400000 161 -19.13455963134765600000 1759 -11.24547481536865200000 9779 -15.92760562896728500000 139 -18.74212646484375000000 77 -15.01097488403320300000 1702 -14.32124900817871100000 352 -12.34408664703369100000 48 -14.12111854553222700000 483 -16.54628944396972700000 306 -15.55061817169189500000 62 -15.25280761718750000000 277 -18.70653915405273400000 390 -17.78722572326660000000 270 -14.88306045532226600000 207 -17.41299057006836000000 3890 -18.85186195373535000000 303 -13.49516963958740200000 150 -6.44370460510253900000 13 -16.30806922912597700000 2946 -12.34408664703369100000 344 -13.94608974456787100000 1983 -14.01806354522705000000 6349 -13.95352458953857400000 2967 -14.80082225799560500000 193 -15.25945186614990200000 2940 -15.20628738403320300000 232 -15.13729476928711000000 172 -14.76396083831787100000 231 -13.93671703338623000000 372 -14.36001586914062500000 313 -12.39815425872802700000 625 -14.07868766784668000000 136 -14.18972015380859400000 5568 -13.64336967468261700000 109 -14.95088291168212900000 447 -15.02966403961181600000 6991 -15.30591773986816400000 5795 -12.63176918029785200000 1695 -13.44269847869873000000 2846 -18.38988113403320300000 19 -19.70778846740722700000 135 -13.19138431549072300000 169 -14.47463130950927700000 9798 -14.57767868041992200000 170 -14.74731540679931600000 194 -13.42181015014648400000 901 -22.61725044250488300000 833 -15.20252895355224600000 8250 -15.46498203277587900000 8395 -15.88020324707031200000 1671 -15.69982147216796900000 1087 -18.81074905395507800000 9785 -14.07868766784668000000 4685 -16.37537384033203000000 87 -19.67361831665039000000 383 -15.08761119842529300000 243 -15.00667476654052700000 50 -14.73368358612060500000 210 -14.64221763610839800000 428 -17.56803894042968800000 181 -13.13254356384277300000 118 -15.78419971466064500000 361 -12.49823760986328100000 35 -19.10681533813476600000 57 -19.24044418334961000000 177 -19.30753898620605500000 353 -14.63549804687500000000 192 -11.24547481536865200000 2859 -15.75998973846435500000 25 -11.83326148986816400000 911 -18.63070487976074200000 3067 -15.97286224365234400000 336 -14.07868766784668000000 348 -14.33651638031005900000 165 -17.20131111145019500000 11337 -12.63176918029785200000 5533 -12.95022201538086000000 155 -14.01021957397461000000 1021 -16.19068145751953000000 267 -20.51228904724121000000 23 -12.22630405426025400000 212 -13.95352458953857400000 869 -15.02966403961181600000 273 -19.10256767272949200000 183 -14.42083644866943400000 74 -15.36958026885986300000 90 -21.98134994506836000000 55 -14.80082225799560500000 297 -15.74572849273681600000 2291 -15.50344848632812500000 5776 -13.44269847869873000000 148 -14.64376926422119100000 200 -18.95388412475586000000 2289 -15.46987247467041000000 125 -14.71121025085449200000 3294 -13.19138431549072300000 2854 -19.20788955688476600000 10611 -11.93862152099609400000 127 -15.97286224365234400000 287 -15.97286224365234400000 66 -15.41103172302246100000 5087 -12.34408664703369100000 256 -15.90738105773925800000 86 -17.95979690551757800000 2639 -18.96637916564941400000 1737 -16.55867958068847700000 137 -15.08308696746826200000 279 -13.33527851104736300000 123 -20.02718353271484400000 187 -14.43389129638671900000 151 -14.46228599548339800000 2725 -17.39607620239257800000 88 -14.14807796478271500000 304 -15.24182701110839800000 1720 -20.41561698913574200000 39 -16.93583297729492200000 281 -20.25026512145996000000 856 -16.03790855407715000000 67 -13.22509574890136700000 408 -5.38285160064697300000 145 -7.66085910797119100000 2847 -18.88174438476562500000 1988 -13.64336967468261700000 174 -14.41759490966796900000 106 -14.27197837829589800000 636 -14.13584613800048800000 4697 -14.95904636383056600000 814 -17.51835060119629000000 382 -15.47230815887451200000 9788 -12.63176918029785200000 34 | 8205 0.00000000000000022204 6841 -0.16705408692359924000 891 -0.02090108208358287800 3472 -0.08338160812854767000 6237 -0.00364868785254657270 6681 -0.00000000000000011102 2664 -0.37469345331192017000 858 -0.09531018137931824000 1646 -0.81093019247055050000 3335 -0.00000000000000011102 1883 -5.43372201919555700000 8255 -0.69314718246459960000 5082 -0.69314718246459960000 570 -0.28768208622932434000 3073 -0.15485416352748870000 755 -0.00245002214796841140 4364 -0.05715841427445412000 2831 -0.34294474124908450000 35 | 412 0.00000000000000000000 36 | -------------------------------------------------------------------------------- /include/moost/http.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_HPP__ 2 | #define __MOOST_HTTP_HPP__ 3 | 4 | // convenience header to include subclasses 5 | #include 6 | #include 7 | 8 | #endif // __MOOST_HTTP_HPP__ 9 | -------------------------------------------------------------------------------- /include/moost/http/connection.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_CONNECTION_HPP__ 2 | #define __MOOST_HTTP_CONNECTION_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "moost/http/reply.hpp" 12 | #include "moost/http/request.hpp" 13 | #include "moost/http/request_handler_base.hpp" 14 | #include "moost/http/request_parser.hpp" 15 | 16 | namespace moost { namespace http { 17 | 18 | /// Represents a single connection from a client. 19 | template 20 | class connection 21 | : public boost::enable_shared_from_this< connection >, 22 | private boost::noncopyable 23 | { 24 | public: 25 | /// Construct a connection with the given io_service. 26 | explicit connection(boost::asio::io_service& io_service, 27 | request_handler_base& handler); 28 | 29 | /// Get the socket associated with the connection. 30 | boost::asio::ip::tcp::socket& socket(); 31 | 32 | /// Start the first asynchronous operation for the connection. 33 | void start(); 34 | 35 | private: 36 | /// Handle completion of a read operation. 37 | void handle_read(const boost::system::error_code& e, 38 | std::size_t bytes_transferred); 39 | 40 | /// Handle completion of a write operation. 41 | void handle_write(const boost::system::error_code& e); 42 | 43 | /// Strand to ensure the connection's handlers are not called concurrently. 44 | boost::asio::io_service::strand strand_; 45 | 46 | /// Socket for the connection. 47 | boost::asio::ip::tcp::socket socket_; 48 | 49 | /// The handler used to process the incoming request. 50 | request_handler_base& request_handler_; 51 | 52 | /// Buffer for incoming data. 53 | boost::array buffer_; 54 | 55 | /// The incoming request. 56 | request request_; 57 | 58 | /// The parser for the incoming request. 59 | request_parser request_parser_; 60 | 61 | /// The reply to be sent back to the client. 62 | reply reply_; 63 | }; 64 | 65 | template 66 | connection::connection(boost::asio::io_service& io_service, 67 | request_handler_base& handler) 68 | : strand_(io_service), 69 | socket_(io_service), 70 | request_handler_(handler) 71 | { 72 | } 73 | 74 | template 75 | void connection::start() 76 | { 77 | socket_.async_read_some(boost::asio::buffer(buffer_), 78 | strand_.wrap( 79 | boost::bind(&connection::handle_read, connection::shared_from_this(), 80 | boost::asio::placeholders::error, 81 | boost::asio::placeholders::bytes_transferred))); 82 | } 83 | 84 | template 85 | void connection::handle_read(const boost::system::error_code& e, 86 | std::size_t bytes_transferred) 87 | { 88 | if (!e) 89 | { 90 | boost::tribool result; 91 | boost::tie(result, boost::tuples::ignore) = request_parser_.parse( 92 | request_, buffer_.data(), buffer_.data() + bytes_transferred); 93 | 94 | if (result) 95 | { 96 | request_handler_.handle_request_base(request_, reply_); 97 | boost::asio::async_write(socket_, reply_.to_buffers(), 98 | strand_.wrap( 99 | boost::bind(&connection::handle_write, connection::shared_from_this(), 100 | boost::asio::placeholders::error))); 101 | } 102 | else if (!result) 103 | { 104 | reply_ = reply::stock_reply(reply::bad_request); 105 | boost::asio::async_write(socket_, reply_.to_buffers(), 106 | strand_.wrap( 107 | boost::bind(&connection::handle_write, connection::shared_from_this(), 108 | boost::asio::placeholders::error))); 109 | } 110 | else 111 | { 112 | socket_.async_read_some(boost::asio::buffer(buffer_), 113 | strand_.wrap( 114 | boost::bind(&connection::handle_read, connection::shared_from_this(), 115 | boost::asio::placeholders::error, 116 | boost::asio::placeholders::bytes_transferred))); 117 | } 118 | } 119 | 120 | // If an error occurs then no new asynchronous operations are started. This 121 | // means that all shared_ptr references to the connection object will 122 | // disappear and the object will be destroyed automatically after this 123 | // handler returns. The connection class's destructor closes the socket. 124 | } 125 | 126 | template 127 | boost::asio::ip::tcp::socket& connection::socket() 128 | { 129 | return socket_; 130 | } 131 | 132 | template 133 | void connection::handle_write(const boost::system::error_code& e) 134 | { 135 | if (!e) 136 | { 137 | // Initiate graceful connection closure. 138 | boost::system::error_code ignored_ec; 139 | socket_.shutdown(boost::asio::ip::tcp::socket::shutdown_both, ignored_ec); 140 | } 141 | 142 | // No new asynchronous operations are started. This means that all shared_ptr 143 | // references to the connection object will disappear and the object will be 144 | // destroyed automatically after this handler returns. The connection class's 145 | // destructor closes the socket. 146 | } 147 | 148 | }} // moost::http 149 | 150 | #endif // __MOOST_HTTP_CONNECTION_HPP__ 151 | -------------------------------------------------------------------------------- /include/moost/http/header.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_HEADER_HPP__ 2 | #define __MOOST_HTTP_HEADER_HPP__ 3 | 4 | #include 5 | 6 | namespace moost { namespace http { 7 | 8 | struct header 9 | { 10 | std::string name; 11 | std::string value; 12 | }; 13 | 14 | }} // moost::http 15 | 16 | #endif // __MOOST_HTTP_HEADER_HPP__ 17 | -------------------------------------------------------------------------------- /include/moost/http/mime_types.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_MIME_TYPES_HPP__ 2 | #define __MOOST_HTTP_MIME_TYPES_HPP__ 3 | 4 | #include 5 | 6 | namespace moost { namespace http { namespace mime_types { 7 | 8 | /// Convert a file extension into a MIME type. 9 | std::string extension_to_type(const std::string& extension); 10 | 11 | }}} // moost::http::mime_types 12 | 13 | #endif // __MOOST_HTTP_MIME_TYPES_HPP__ 14 | -------------------------------------------------------------------------------- /include/moost/http/reply.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_REPLY_HPP__ 2 | #define __MOOST_HTTP_REPLY_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "moost/http/header.hpp" 9 | 10 | namespace moost { namespace http { 11 | 12 | /// A reply to be sent to a client. 13 | struct reply 14 | { 15 | /// The status of the reply. 16 | enum status_type 17 | { 18 | ok = 200, 19 | created = 201, 20 | accepted = 202, 21 | no_content = 204, 22 | multiple_choices = 300, 23 | moved_permanently = 301, 24 | moved_temporarily = 302, 25 | not_modified = 304, 26 | bad_request = 400, 27 | unauthorized = 401, 28 | forbidden = 403, 29 | not_found = 404, 30 | internal_server_error = 500, 31 | not_implemented = 501, 32 | bad_gateway = 502, 33 | service_unavailable = 503 34 | } status; 35 | 36 | /// The headers to be included in the reply. 37 | std::vector
headers; 38 | 39 | /// The content to be sent in the reply. 40 | std::string content; 41 | 42 | /// Convert the reply into a vector of buffers. The buffers do not own the 43 | /// underlying memory blocks, therefore the reply object must remain valid and 44 | /// not be changed until the write operation has completed. 45 | std::vector to_buffers(); 46 | 47 | /// Get a stock reply. 48 | static reply stock_reply(status_type status); 49 | }; 50 | 51 | }} // moost::http 52 | 53 | #endif // __MOOST_HTTP_REPLY_HPP__ 54 | -------------------------------------------------------------------------------- /include/moost/http/request.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_REQUEST_HPP__ 2 | #define __MOOST_HTTP_REQUEST_HPP__ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "moost/http/header.hpp" 10 | 11 | namespace moost { namespace http { 12 | 13 | /// A request received from a client. 14 | struct request 15 | { 16 | std::string method; 17 | std::string uri; 18 | int http_version_major; 19 | int http_version_minor; 20 | std::vector
headers; 21 | std::string content; // the content (body) of the request 22 | 23 | std::vector
::iterator find_header(const std::string & header_name) 24 | { 25 | std::vector
::iterator result; 26 | for (result = headers.begin(); result != headers.end(); ++result) 27 | { 28 | if (boost::algorithm::iequals(result->name, header_name)) 29 | break; 30 | } 31 | return result; 32 | } 33 | }; 34 | 35 | }} // moost::http 36 | 37 | #endif // __MOOST_HTTP_REQUEST_HPP__ 38 | -------------------------------------------------------------------------------- /include/moost/http/request_handler_base.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_REQUEST_HANDLER_BASE_HPP__ 2 | #define __MOOST_HTTP_REQUEST_HANDLER_BASE_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "moost/http/reply.hpp" 9 | #include "moost/http/request.hpp" 10 | 11 | namespace moost { namespace http { 12 | 13 | /// the common handler for all incoming requests. 14 | template 15 | struct request_handler_base 16 | : private boost::noncopyable 17 | { 18 | /// handle a request, configure set up headers, pass on to handle_request 19 | void handle_request_base(const request& req, reply& rep) 20 | { 21 | rep.status = reply::ok; 22 | rep.headers.resize(2); 23 | rep.headers[0].name = "Content-Length"; 24 | rep.headers[1].name = "Content-Type"; 25 | rep.headers[1].value = "text/plain"; 26 | 27 | static_cast< RequestHandler * >(this)->handle_request(req, rep); 28 | 29 | rep.headers[0].value = boost::lexical_cast(rep.content.size()); 30 | } 31 | 32 | void handle_request(const request& req, reply& rep) 33 | { 34 | // default base implementation does nothing 35 | } 36 | }; 37 | 38 | }} // moost::http 39 | 40 | #endif // __MOOST_HTTP_REQUEST_HANDLER_BASE_HPP__ 41 | -------------------------------------------------------------------------------- /include/moost/http/request_parser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_REQUEST_PARSER_HPP__ 2 | #define __MOOST_HTTP_REQUEST_PARSER_HPP__ 3 | 4 | #include 5 | #include 6 | 7 | #include "moost/http/request.hpp" 8 | 9 | namespace moost { namespace http { 10 | 11 | /// Parser for incoming requests. 12 | class request_parser 13 | { 14 | public: 15 | /// Construct ready to parse the request method. 16 | request_parser(); 17 | 18 | /// Reset to initial parser state. 19 | void reset(); 20 | 21 | /// Parse some data. The tribool return value is true when a complete request 22 | /// has been parsed, false if the data is invalid, indeterminate when more 23 | /// data is required. The InputIterator return value indicates how much of the 24 | /// input has been consumed. 25 | template 26 | boost::tuple parse(request& req, 27 | InputIterator begin, InputIterator end) 28 | { 29 | boost::tribool result = boost::indeterminate; 30 | if (state_ != header_end) 31 | { 32 | while (begin != end) 33 | { 34 | result = consume_header(req, *begin++); 35 | if (result) 36 | break; 37 | else if (!result) 38 | return boost::make_tuple(result, begin); // something wrong in the header 39 | } 40 | } 41 | if (state_ == header_end) 42 | result = consume_body(req, begin, end); 43 | return boost::make_tuple(result, begin); 44 | } 45 | 46 | template 47 | boost::tribool consume_body(request & req, InputIterator begin, InputIterator end) 48 | { 49 | if (content_to_read_ < 0) 50 | return false; // probably bad content-length 51 | int content_read = std::min(static_cast(end - begin), content_to_read_); 52 | req.content.append(begin, begin + content_read); 53 | begin += content_read; 54 | content_to_read_ -= content_read; 55 | if (content_to_read_ == 0) 56 | return true; 57 | else 58 | return boost::indeterminate; 59 | } 60 | 61 | private: 62 | 63 | /// find and parse the content-length header 64 | boost::tribool parse_content_length(request & req); 65 | 66 | /// Handle the next character of input. 67 | boost::tribool consume_header(request& req, char input); 68 | 69 | /// Check if a byte is an HTTP character. 70 | static bool is_char(int c); 71 | 72 | /// Check if a byte is an HTTP control character. 73 | static bool is_ctl(int c); 74 | 75 | /// Check if a byte is defined as an HTTP tspecial character. 76 | static bool is_tspecial(int c); 77 | 78 | /// Check if a byte is a digit. 79 | static bool is_digit(int c); 80 | 81 | /// The current state of the parser. 82 | enum state 83 | { 84 | method_start, 85 | method, 86 | uri_start, 87 | uri, 88 | http_version_h, 89 | http_version_t_1, 90 | http_version_t_2, 91 | http_version_p, 92 | http_version_slash, 93 | http_version_major_start, 94 | http_version_major, 95 | http_version_minor_start, 96 | http_version_minor, 97 | expecting_newline_1, 98 | header_line_start, 99 | header_lws, 100 | header_name, 101 | space_before_header_value, 102 | header_value, 103 | expecting_newline_2, 104 | expecting_newline_3, 105 | header_end 106 | } state_; 107 | 108 | int content_to_read_; 109 | }; 110 | 111 | }} // moost::http 112 | 113 | #endif // __MOOST_HTTP_REQUEST_PARSER_HPP__ 114 | -------------------------------------------------------------------------------- /include/moost/http/server.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOOST_HTTP_SERVER_HPP__ 2 | #define __MOOST_HTTP_SERVER_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "moost/http/connection.hpp" 14 | #include "moost/http/request_handler_base.hpp" 15 | 16 | namespace moost { namespace http { 17 | 18 | /// The top-level class of the HTTP server. 19 | template 20 | class server 21 | : private boost::noncopyable 22 | { 23 | public: 24 | /// Construct the server to listen on the specified TCP address and port, and 25 | /// serve up files from the given directory. 26 | explicit server(const std::string& address, int port, 27 | std::size_t thread_pool_size); 28 | 29 | RequestHandler & request_handler() 30 | { 31 | return request_handler_; 32 | } 33 | 34 | /// Run the server's io_service loop. 35 | void run(); 36 | 37 | /// Stop the server. 38 | void stop(); 39 | 40 | private: 41 | /// Handle completion of an asynchronous accept operation. 42 | void handle_accept(const boost::system::error_code& e); 43 | 44 | /// The number of threads that will call io_service::run(). 45 | std::size_t thread_pool_size_; 46 | 47 | /// The io_service used to perform asynchronous operations. 48 | boost::asio::io_service io_service_; 49 | 50 | /// Acceptor used to listen for incoming connections. 51 | boost::asio::ip::tcp::acceptor acceptor_; 52 | 53 | /// The handler for all incoming requests. 54 | RequestHandler request_handler_; 55 | 56 | /// The next connection to be accepted. 57 | boost::shared_ptr< connection > new_connection_; 58 | 59 | /// The endpoint of the address to bind 60 | boost::asio::ip::tcp::endpoint endpoint_; 61 | }; 62 | 63 | template 64 | server::server(const std::string& address, int port, 65 | std::size_t thread_pool_size) 66 | : thread_pool_size_(thread_pool_size), 67 | acceptor_(io_service_), 68 | request_handler_(), 69 | new_connection_(new connection(io_service_, request_handler_)) 70 | { 71 | boost::asio::ip::tcp::resolver resolver(io_service_); 72 | boost::asio::ip::tcp::resolver::query query(address, boost::lexical_cast(port)); 73 | endpoint_ = *resolver.resolve(query); 74 | } 75 | 76 | template 77 | void server::handle_accept(const boost::system::error_code& e) 78 | { 79 | if (!e) 80 | { 81 | new_connection_->start(); 82 | new_connection_.reset(new connection(io_service_, request_handler_)); 83 | } 84 | acceptor_.async_accept(new_connection_->socket(), 85 | boost::bind(&server::handle_accept, this, boost::asio::placeholders::error)); 86 | } 87 | 88 | template 89 | void server::run() 90 | { 91 | // Open the acceptor with the option to reuse the address (i.e. SO_REUSEADDR). 92 | acceptor_.open(endpoint_.protocol()); 93 | acceptor_.set_option(boost::asio::ip::tcp::acceptor::reuse_address(true)); 94 | acceptor_.bind(endpoint_); 95 | acceptor_.listen(); 96 | 97 | // pump the first async accept into the loop 98 | acceptor_.async_accept(new_connection_->socket(), 99 | boost::bind(&server::handle_accept, this, 100 | boost::asio::placeholders::error)); 101 | 102 | // Create a pool of threads to run all of the io_services. 103 | std::vector > threads; 104 | for (std::size_t i = 0; i < thread_pool_size_; ++i) 105 | { 106 | boost::shared_ptr thread(new boost::thread( 107 | boost::bind(&boost::asio::io_service::run, &io_service_))); 108 | threads.push_back(thread); 109 | } 110 | 111 | // Wait for all threads in the pool to exit. 112 | for (std::size_t i = 0; i < threads.size(); ++i) 113 | threads[i]->join(); 114 | } 115 | 116 | template 117 | void server::stop() 118 | { 119 | io_service_.stop(); 120 | } 121 | 122 | }} // moost::http 123 | 124 | #endif // __MOOST_HTTP_SERVER_HPP__ 125 | -------------------------------------------------------------------------------- /include/pfp/binary_grammar.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __BINARY_GRAMMAR_HPP__ 2 | #define __BINARY_GRAMMAR_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace com { namespace wavii { namespace pfp { 15 | 16 | // binary rules have a left child, right child, parent, weight 17 | // such as: VBD...PP => VP, 100 18 | // weights represent the preference for their respective rule 19 | class binary_grammar 20 | { 21 | public: 22 | 23 | struct relationship 24 | { 25 | state_t left; 26 | state_t rite; 27 | state_score_t result; 28 | relationship() {} 29 | relationship(state_t left_, state_t rite_, state_score_t result_) : left(left_), rite(rite_), result(result_) {} 30 | }; 31 | 32 | struct cmp_left 33 | { 34 | bool operator()( const relationship & lhs, const relationship & rhs ) const { return lhs.left < rhs.left; } 35 | }; 36 | 37 | struct cmp_rite 38 | { 39 | bool operator()( const relationship & lhs, const relationship & rhs ) const { return lhs.rite < rhs.rite; } 40 | }; 41 | 42 | typedef std::vector< relationship >::const_iterator const_iterator; 43 | 44 | private: 45 | 46 | static const state_t boundary = 412; 47 | const state_list & m_states; 48 | std::vector< std::vector< relationship > > m_rules; 49 | std::vector< std::vector< relationship > > m_rules_parent; 50 | std::vector< relationship > m_boundary_rules; 51 | 52 | public: 53 | 54 | binary_grammar(const state_list & states) : m_states(states) {} 55 | 56 | binary_grammar(const state_list & states, const std::string & path) 57 | : m_states(states) 58 | { 59 | std::ifstream in(path.c_str()); 60 | load(in); 61 | } 62 | 63 | void load(std::istream & in) 64 | { 65 | m_rules.clear(); m_rules.resize(m_states.size()); 66 | m_rules_parent.clear(); m_rules_parent.resize(m_states.size()); 67 | m_boundary_rules.clear(); 68 | // stream is => [ left right parent score ] 69 | std::vector< size_t > sizes(m_states.size(), 0); 70 | std::vector< size_t > sizes_parent(m_states.size(), 0); 71 | std::vector< relationship > relationships; 72 | size_t boundary_size = 0; 73 | float f; 74 | relationship rel; 75 | // we want the potential space used here to be as tight as possible 76 | // to increase our cacheability. so precompute all our sizes up front 77 | // so that we avoid resizing dynamically 78 | while (in >> rel.left >> rel.rite >> rel.result.state >> f) 79 | { 80 | rel.result.score = static_cast(f * consts::score_resolution); 81 | relationships.push_back(rel); 82 | if (!m_states[rel.left].synthetic && !m_states[rel.rite].synthetic) 83 | ++boundary_size; 84 | else 85 | ++sizes[rel.left], ++sizes_parent[rel.result.state]; 86 | } 87 | for (state_t i = 0; i != m_states.size(); ++i) 88 | { 89 | if (sizes[i] > 0) 90 | m_rules[i].reserve(sizes[i]); 91 | if (sizes_parent[i] > 0) 92 | m_rules_parent.reserve(sizes_parent[i]); 93 | } 94 | // boundary rules are a small set of rules that result in only a few top-level states 95 | // (such as S, S-FRAG, and so on) being able to combine with the boundary symbol to produce the goal state. 96 | m_boundary_rules.reserve(boundary_size); 97 | for (std::vector::const_iterator it = relationships.begin(); it != relationships.end(); ++it) 98 | { 99 | if (!m_states[it->left].synthetic && !m_states[it->rite].synthetic) 100 | m_boundary_rules.push_back(*it); 101 | else 102 | { 103 | m_rules[it->left].push_back(*it); 104 | m_rules_parent[it->result.state].push_back(*it); 105 | } 106 | } 107 | // sort our rules for better cache hitting when operating on them 108 | for (state_t i = 0; i != m_states.size(); ++i) 109 | { 110 | std::sort(m_rules[i].begin(), m_rules[i].end(), cmp_rite()); 111 | std::sort(m_rules_parent[i].begin(), m_rules_parent[i].end(), cmp_left()); 112 | } 113 | } 114 | 115 | const std::vector< relationship > & get_rules(state_t left) const 116 | { 117 | return m_rules[left]; 118 | } 119 | 120 | const std::vector< relationship > & get_rules_parent(state_t parent) const 121 | { 122 | return m_rules_parent[parent]; 123 | } 124 | 125 | const_iterator boundary_begin() const 126 | { 127 | return m_boundary_rules.begin(); 128 | } 129 | 130 | const_iterator boundary_end() const 131 | { 132 | return m_boundary_rules.end(); 133 | } 134 | }; 135 | 136 | }}} // com::wavii::pfp 137 | 138 | #endif // __BINARY_GRAMMAR_HPP__ 139 | -------------------------------------------------------------------------------- /include/pfp/config.h: -------------------------------------------------------------------------------- 1 | #ifndef __CONFIG_H__ 2 | #define __CONFIG_H__ 3 | 4 | namespace com { namespace wavii { namespace pfp { 5 | 6 | // typedefs and consts 7 | 8 | typedef unsigned short state_t; // currently around 12,000 distinct states 9 | typedef unsigned short word_t; // around 47,000 words in our lexicon 10 | typedef float count_t; // counts in our lexicon (just keep float for easy manipulation) 11 | typedef short score_t; // we don't need full float resolution, use short for memory+speed gain 12 | typedef unsigned char pos_t; // word position in a sentence, never more than 100 13 | 14 | struct consts 15 | { 16 | static const char * version; // version of pfp engine 17 | static const float score_resolution; // spread scores more evenly across a downcast space 18 | static const score_t empty_score; // hasn't been scored 19 | static const count_t smooth_threshold; // significance threshold for words we haven't seen enough to build our lexicon 20 | static const float word_smooth_factor; // add this much smoothing to word weights 21 | static const float sig_smooth_factor; // add this much smoothing to signature weights 22 | static const state_t goal_state; // our goal state, which we strive to attain 23 | static const state_t boundary_state; // the boundary state marks the end of a sentence 24 | static const score_t epsilon; // an almost-or-totally insignificant amount 25 | }; 26 | 27 | }}} // com::wavi::pfp 28 | 29 | #endif // __CONFIG_H__ 30 | -------------------------------------------------------------------------------- /include/pfp/lexicon.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __LEXICON_HPP__ 2 | #define __LEXICON_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // to get libicu goodness 11 | #include 12 | // for ostream support 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | namespace com { namespace wavii { namespace pfp { 21 | 22 | // provide a conditional probability P(state | word) 23 | // we try to calculate the conditional probability by looking 24 | // the word up in our lexicon. if we can't find the word, we 25 | // try looking up a simple, high-level signature instead 26 | class lexicon 27 | { 28 | private: 29 | 30 | const state_list & m_states; 31 | std::vector< std::vector< std::pair< state_t, count_t > > > m_word_state; 32 | std::vector< std::vector< std::pair< state_t, count_t > > > m_sig_state; 33 | boost::unordered_map< std::string, word_t > m_word_index; 34 | boost::unordered_map< std::string, word_t > m_sig_index; 35 | std::vector< count_t > m_word; 36 | std::vector< count_t > m_sig; 37 | std::vector< count_t > m_known_state; 38 | std::vector< count_t > m_unknown_state; 39 | count_t m_known; 40 | count_t m_unknown; 41 | std::vector< std::pair< state_t, count_t > > m_open_class; 42 | std::vector< std::pair< state_t, count_t > > m_any; 43 | std::vector< std::pair< state_t, count_t > > m_none; 44 | 45 | struct cmp_state_t_count_t 46 | { 47 | bool operator()(const std::pair< state_t, count_t > & lhs, const std::pair< state_t, count_t > & rhs) const 48 | { return lhs.first < rhs.first; } 49 | }; 50 | 51 | // outer join of major + minor, with preference for major at intersecting states 52 | void merge_states(const std::vector< std::pair< state_t, count_t > > & major, 53 | const std::vector< std::pair< state_t, count_t > > & minor, 54 | std::vector< std::pair< state_t, count_t > > & out) 55 | { 56 | std::vector< std::pair< state_t, count_t > >::const_iterator it_maj = major.begin(), end_maj = major.end(), 57 | it_min = minor.begin(), end_min = minor.end(); 58 | while (it_maj != end_maj && it_min != end_min) 59 | { 60 | if (it_maj->first < it_min->first) 61 | out.push_back(*it_maj++); 62 | else if (it_min->first < it_maj->first) 63 | out.push_back(*it_min++); 64 | else 65 | out.push_back(*it_maj++), ++it_min; 66 | } 67 | while (it_maj != end_maj) 68 | out.push_back(*it_maj++); 69 | while (it_min != end_min) 70 | out.push_back(*it_min++); 71 | } 72 | 73 | // right join major + minor, with preference for major at intersecting states 74 | void right_intersect_states(const std::vector< std::pair< state_t, count_t > > & major, 75 | const std::vector< std::pair< state_t, count_t > > & minor, 76 | std::vector< std::pair< state_t, count_t > > & out) 77 | { 78 | std::vector< std::pair< state_t, count_t > >::const_iterator it_maj = major.begin(), end_maj = major.end(), 79 | it_min = minor.begin(), end_min = minor.end(); 80 | while (it_maj != end_maj && it_min != end_min) 81 | { 82 | if (it_maj->first < it_min->first) 83 | ++it_maj; 84 | else if (it_min->first < it_maj->first) 85 | out.push_back(*it_min++); 86 | else 87 | out.push_back(*it_maj++), ++it_min; 88 | } 89 | while (it_min != end_min) 90 | out.push_back(*it_min++); 91 | } 92 | 93 | // get a very coarse signature of a word, consisting of whether 94 | // - the word is all caps 95 | // - the word is capitalized as the first word of a sentence 96 | // - the word is capitalized as not the first word 97 | // - the word is lowercase 98 | // - the word has a dash 99 | // - the word contains digits 100 | // - the word is a number 101 | // - the final letter of the word 102 | std::string get_sig(const std::string & word, int pos) 103 | { 104 | UnicodeString us(word.c_str()); 105 | bool digit = false; 106 | bool nondigit = false; 107 | bool lower = false; 108 | 109 | std::ostringstream oss; 110 | oss << "UNK"; 111 | int len = us.length(); 112 | 113 | for (int i = 0; i != len; ++i) 114 | { 115 | if (u_isdigit(us[i])) 116 | digit = true; 117 | else 118 | { 119 | nondigit = true; 120 | if (u_isalpha(us[i]) && (u_islower(us[i]) || u_istitle(us[i]))) 121 | lower = true; 122 | } 123 | } 124 | if (len > 0 && (u_isupper(us[0]) || u_istitle(us[0]))) 125 | { 126 | if (!lower) 127 | oss << "-ALLC"; 128 | else if (pos == 0) 129 | oss << "-INIT"; 130 | else 131 | oss << "-UC"; 132 | } 133 | else if (lower) 134 | oss << "-LC"; 135 | if (us.indexOf('-') > -1) 136 | oss << "-DASH"; 137 | if (digit) 138 | { 139 | if (nondigit) 140 | oss << "-DIG"; 141 | else 142 | oss << "-NUM"; 143 | } 144 | else if (len > 3) 145 | oss << UnicodeString(u_tolower(us[len - 1])); 146 | 147 | return oss.str(); 148 | } 149 | 150 | // get the conditional probability for a word by looking it up in our lexicon 151 | // if smooth is true, add in some other potential states with an unknown probability 152 | template 153 | void word_score(word_t word, OutputIterator out, bool smooth) 154 | { 155 | std::vector< std::pair< state_t, count_t > > states; 156 | merge_states(m_word_state[word], smooth ? m_any : m_none, states); 157 | 158 | for (std::vector< std::pair< state_t, count_t > >::const_iterator it = states.begin(); it != states.end(); ++it) 159 | { 160 | float cw = m_word[word]; 161 | float ct = m_known_state[it->first]; 162 | float pbtw = smooth ? (it->second + consts::word_smooth_factor * (m_unknown_state[it->first] / m_unknown) ) / (cw + consts::word_smooth_factor) : (it->second / cw); 163 | float pbwt = std::log(pbtw * cw / ct); 164 | if (pbwt > -100.0f) 165 | *out++ = std::make_pair(it->first, pbwt); 166 | } 167 | } 168 | 169 | // get the conditional probability for a word by building a signature out of it 170 | template 171 | void sig_score(const std::string & word, OutputIterator out, int pos) 172 | { 173 | std::vector< std::pair< state_t, count_t > > states; 174 | boost::unordered_map< std::string, word_t >::iterator it_s = m_sig_index.find(get_sig(word, pos)); 175 | if (it_s != m_sig_index.end()) 176 | right_intersect_states(m_sig_state[it_s->second], m_open_class, states); 177 | else 178 | states = m_open_class; 179 | for (std::vector< std::pair< state_t, count_t > >::const_iterator it = states.begin(); it != states.end(); ++it) 180 | { 181 | float cs = it_s == m_sig_index.end() ? 0.0 : m_sig[it_s->second]; 182 | float ct = m_known_state[it->first]; 183 | float pbts = (it->second + consts::sig_smooth_factor * (m_unknown_state[it->first] / m_unknown) ) / (cs + consts::sig_smooth_factor); 184 | float pbwt = std::log(pbts / ct); 185 | if (pbwt > -100.0f) 186 | *out++ = std::make_pair(it->first, pbwt); 187 | } 188 | } 189 | 190 | public: 191 | 192 | lexicon(const state_list & states) : m_states(states), m_known(0), m_unknown(0) {} 193 | 194 | lexicon(const state_list & states, 195 | const std::string & word_path, 196 | const std::string & sig_path, 197 | const std::string & word_state_path, 198 | const std::string & sig_state_path) 199 | : m_states(states), m_known(0), m_unknown(0) 200 | { 201 | std::ifstream word_in(word_path.c_str()); 202 | std::ifstream sig_in(sig_path.c_str()); 203 | std::ifstream word_state_in(word_state_path.c_str()); 204 | std::ifstream sig_state_in(sig_state_path.c_str()); 205 | load(word_in, sig_in, word_state_in, sig_state_in); 206 | } 207 | 208 | void load(std::istream & word_in, 209 | std::istream & sig_in, 210 | std::istream & word_state_in, 211 | std::istream & sig_state_in) 212 | { 213 | // get open class states 214 | // these are the set of states that have proven their arbitraryness enough 215 | // in training that we should consider them as candidates for signature 216 | // scoring 217 | m_open_class.clear(); 218 | for (state_t i = 0; i != m_states.size(); ++i) 219 | { 220 | if (m_states[i].open_class) 221 | m_open_class.push_back(std::make_pair(i, 0)); 222 | } 223 | 224 | // get words 225 | word_t max_word = 0; 226 | { 227 | std::string word; 228 | word_t index; 229 | m_word_index.clear(); 230 | while (word_in >> index) 231 | { 232 | word_in.get(); // skip sp 233 | std::getline(word_in, word); 234 | m_word_index[word] = index; 235 | if (index > max_word) 236 | max_word = index; 237 | } 238 | } 239 | // get sigs 240 | word_t max_sig = 0; 241 | { 242 | std::string sig; 243 | word_t index; 244 | m_sig_index.clear(); 245 | while (sig_in >> index) 246 | { 247 | sig_in.get(); // skip sp 248 | std::getline(sig_in, sig); 249 | m_sig_index[sig] = index; 250 | if (index > max_sig) 251 | max_sig = index; 252 | } 253 | } 254 | // get state|word 255 | { 256 | word_t word; 257 | std::pair< state_t, count_t > state_count; 258 | m_word.clear(); m_word.resize(max_word + 1); 259 | m_word_state.clear(); m_word_state.resize(max_word + 1); 260 | m_known_state.clear(); m_known_state.resize(m_states.size()); 261 | while (word_state_in >> word >> state_count.first >> state_count.second) 262 | { 263 | m_word[word] += state_count.second; 264 | m_word_state[word].push_back(state_count); 265 | m_known_state[state_count.first] += state_count.second; 266 | m_known += state_count.second; 267 | } 268 | } 269 | // get state|sig 270 | { 271 | word_t sig; 272 | std::pair< state_t, count_t > state_count; 273 | m_sig.clear(); m_sig.resize(max_sig + 1); 274 | m_sig_state.clear(); m_sig_state.resize(max_sig + 1); 275 | m_unknown_state.clear(); m_unknown_state.resize(m_states.size()); 276 | while (sig_state_in >> sig >> state_count.first >> state_count.second) 277 | { 278 | m_sig[sig] += state_count.second; 279 | m_sig_state[sig].push_back(state_count); 280 | m_unknown_state[state_count.first] += state_count.second; 281 | m_unknown += state_count.second; 282 | } 283 | } 284 | // fill m_any 285 | m_any.clear(); 286 | for (state_t i = 0; i != m_states.size(); ++i) 287 | { 288 | if (m_known_state[i] > 0) 289 | m_any.push_back(std::make_pair(i, 0)); 290 | } 291 | // sort our state counts by state 292 | std::sort(m_open_class.begin(), m_open_class.end(), cmp_state_t_count_t()); 293 | for (std::vector< std::vector< std::pair< state_t, count_t > > >::iterator it = m_word_state.begin(); it != m_word_state.end(); ++it) 294 | std::sort(it->begin(), it->end(), cmp_state_t_count_t()); 295 | for (std::vector< std::vector< std::pair< state_t, count_t > > >::iterator it = m_sig_state.begin(); it != m_sig_state.end(); ++it) 296 | std::sort(it->begin(), it->end(), cmp_state_t_count_t()); 297 | } 298 | 299 | template 300 | void score(const std::string & word, OutputIterator out, int pos = -1) 301 | { 302 | boost::unordered_map< std::string, word_t >::iterator it_w = m_word_index.find(word); 303 | if (it_w != m_word_index.end() && m_word[it_w->second] > 0) // have we seen this word? 304 | word_score(it_w->second, out, m_word[it_w->second] <= consts::smooth_threshold); 305 | else 306 | sig_score(word, out, pos); 307 | } 308 | }; 309 | 310 | }}} // com::wavii::pfp 311 | 312 | #endif // __LEXICON_HPP__ 313 | -------------------------------------------------------------------------------- /include/pfp/pcfg_parser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __PCFG_PARSER_HPP__ 2 | #define __PCFG_PARSER_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace com { namespace wavii { namespace pfp { 15 | 16 | // exhaustive parser for a probabilistic context-free grammar 17 | // exploits dynamic programming to iteratively score every 18 | // possible state for every possible span of tags, in a bottom-up 19 | // approach. optimized to visit the fewest combinations possible 20 | // of a potentially n^3 * s^2 space: begin, end, split, left-child, right-child 21 | class pcfg_parser 22 | { 23 | private: 24 | 25 | const state_list & m_states; // list of states and their properties 26 | const unary_grammar & m_ug; // our unary grammar rules 27 | const binary_grammar & m_bg; // our binary grammar rules 28 | 29 | void best_parse(node & tree, const std::vector< std::vector< state_score_t > > & sentence, workspace & ws, pos_t begin, pos_t end) 30 | { 31 | tree.score = ws.get(begin, end, tree.state); 32 | 33 | if (end - begin == 1) 34 | { 35 | // are we perhaps at a terminal state? 36 | std::vector< state_score_t >::const_iterator it_ts = std::lower_bound(sentence[begin].begin(), sentence[begin].end(), state_score_t(tree.state, 0)); 37 | if (it_ts != sentence[begin].end() && it_ts->state == tree.state) 38 | return; 39 | } 40 | 41 | // first check binary rules 42 | binary_grammar::const_iterator it_br, beg_br, end_br; 43 | score_t split; 44 | if (begin == 0 && end == sentence.size()) // ah, the boundary symbol rules 45 | beg_br = m_bg.boundary_begin(), end_br = m_bg.boundary_end(), split = end - 1; 46 | else 47 | beg_br = m_bg.get_rules_parent(tree.state).begin(), end_br = m_bg.get_rules_parent(tree.state).end(), split = begin + 1; 48 | for (; split != end; ++split) 49 | { 50 | for (it_br = beg_br; it_br != end_br; ++it_br) 51 | { 52 | if (std::abs(it_br->result.score + ws.get(begin, split, it_br->left) + ws.get(split, end, it_br->rite) - tree.score) <= consts::epsilon) 53 | { 54 | tree.children.push_back(boost::shared_ptr(new node(it_br->left, 0))); 55 | tree.children.push_back(boost::shared_ptr(new node(it_br->rite, 0))); 56 | best_parse(*tree.children[0], sentence, ws, begin, split); 57 | best_parse(*tree.children[1], sentence, ws, split, end); 58 | return; 59 | } 60 | } 61 | } 62 | // now check unary rules, with non-closed grammar 63 | unary_grammar::const_iterator it_ur = m_ug.get_rules_parent(tree.state).begin(), end_ur = m_ug.get_rules_parent(tree.state).end(); 64 | for (; it_ur != end_ur; ++it_ur) 65 | { 66 | if (std::abs(it_ur->result.score + ws.get(begin, end, it_ur->child) - tree.score) <= consts::epsilon) 67 | { 68 | tree.children.push_back(boost::shared_ptr(new node(it_ur->child, 0))); 69 | best_parse(*tree.children[0], sentence, ws, begin, end); 70 | return; 71 | } 72 | } 73 | // kill screen! 74 | throw std::runtime_error("game over, man!"); 75 | } 76 | 77 | void debinarize(node & tree) 78 | { 79 | // our grammar produces only binary and unary relations, and represents 80 | // n-ary relationships using synthetic states. this method removes internal synthetic states 81 | // to convert 1-2-ary tree to n-ary 82 | std::vector< boost::shared_ptr< node > > children; 83 | for (std::vector< boost::shared_ptr< node > >::iterator it = tree.children.begin(); it != tree.children.end(); ++it) 84 | { 85 | debinarize(**it); 86 | if (m_states[(*it)->state].synthetic) 87 | children.insert(children.end(), (*it)->children.begin(), (*it)->children.end()); 88 | else 89 | children.push_back( *it ); 90 | } 91 | tree.children = children; 92 | } 93 | 94 | public: 95 | 96 | pcfg_parser(const state_list & states, const unary_grammar & ug, const binary_grammar & bg) 97 | : m_states(states), m_ug(ug), m_bg(bg) 98 | { 99 | } 100 | 101 | // return true if a parse was found, and populate result tree 102 | // sentence word clouds must be sorted by state 103 | // workspace must be of adequate size for sentence length 104 | bool parse( const std::vector< std::vector< state_score_t > > & sentence, 105 | workspace & ws, 106 | node & tree ) 107 | { 108 | pos_t sentence_size = static_cast(sentence.size()); 109 | if ( sentence.size() > ws.words ) 110 | { 111 | std::ostringstream oss; 112 | oss << "sentence too large for provided workspace (" << sentence.size() << ">" << static_cast(ws.words) << ")"; 113 | throw std::runtime_error(oss.str()); 114 | } 115 | 116 | // initialize our workspace 117 | ws.clear(sentence_size); 118 | for (size_t i = 0; i != sentence.size(); ++i) 119 | ws.put(i, i + 1, sentence[i].begin(), sentence[i].end()); // provide the initial state from the sentence 120 | 121 | // hokay! look inside ever-widening ranges for subranges that match unary/binary rules 122 | pos_t rsize, rbegin, rend, rsplit, rsplit_end; 123 | binary_grammar::const_iterator it_r, end_r; 124 | unary_grammar::const_iterator it_ur, end_ur; 125 | size_t i_ne, sz_ne; 126 | state_t left; 127 | int val, result; 128 | for (rsize = 1; rsize != sentence_size; ++rsize) 129 | { 130 | for (rbegin = 0, rend = rbegin + rsize; rend != sentence_size; ++rend, ++rbegin) 131 | { 132 | if (rsize > 1) 133 | { 134 | // first do binary rules 135 | // check states that have narrow extents that potentially leave space for a child after 136 | for (i_ne = 0, sz_ne = ws.seen_states[rbegin].size(); i_ne != sz_ne; ++i_ne) 137 | { 138 | left = ws.seen_states[rbegin][i_ne]; 139 | // check the rite children 140 | bounds & br = ws.rite_extents[rbegin][left]; 141 | for (it_r = m_bg.get_rules(left).begin(), end_r = m_bg.get_rules(left).end(); it_r != end_r; ++it_r) 142 | { 143 | bounds & bl = ws.left_extents[rend][it_r->rite]; 144 | // do these left extents potentially leave space AND potentially reach far enough? 145 | if (bl.narrow < br.narrow || bl.wide > br.wide) 146 | continue; 147 | // okay, search a split from the earliest one could begin to the latest 148 | rsplit = std::max(br.narrow, bl.wide); 149 | rsplit_end = std::min(br.wide, bl.narrow); 150 | result = consts::empty_score; 151 | for (; rsplit <= rsplit_end; ++rsplit) 152 | { 153 | val = ws.get(rbegin, rsplit, left) + ws.get(rsplit, rend, it_r->rite); 154 | if (val > result) 155 | result = val; 156 | } 157 | if (result != consts::empty_score) 158 | ws.put(rbegin, rend, it_r->result.state, it_r->result.score + result); 159 | } 160 | } // binary rules 161 | } 162 | // now do unary rules 163 | for ( it_ur = m_ug.closed_begin(), end_ur = m_ug.closed_end(); it_ur != end_ur; ++it_ur) 164 | { 165 | result = ws.get(rbegin, rend, it_ur->child); 166 | if (result != consts::empty_score) 167 | ws.put(rbegin, rend, it_ur->result.state, result + it_ur->result.score); 168 | } // unary rules 169 | } // rbegin 170 | } // rsize 171 | 172 | // run the boundary-symbol rules 173 | rbegin = 0; 174 | rend = rsize; 175 | rsplit = rsize - 1; 176 | score_t left_score, boundary_score = ws.get(rsplit, rend, consts::boundary_state); 177 | for (it_r = m_bg.boundary_begin(), end_r = m_bg.boundary_end(); it_r != end_r; ++it_r) 178 | { 179 | left_score = ws.get(rbegin, rsplit, it_r->left); 180 | if (left_score != consts::empty_score) 181 | ws.put(rbegin, rend, it_r->result.state, left_score + boundary_score + it_r->result.score); 182 | } 183 | 184 | if (ws.get(rbegin, rend, consts::goal_state) != consts::empty_score) 185 | { 186 | tree.state = consts::goal_state; 187 | best_parse(tree, sentence, ws, rbegin, rend); 188 | debinarize(tree); 189 | return true; 190 | } 191 | 192 | return false; 193 | } 194 | 195 | // return true if a parse was found, and populate result 196 | bool parse( const std::vector< std::vector< state_score_t > > & sentence, 197 | node & tree ) 198 | { 199 | workspace ws(sentence.size(), m_states.size() ); 200 | return parse(sentence, ws, tree); 201 | } 202 | 203 | }; 204 | 205 | }}} // com::wavii::pfp 206 | 207 | #endif // __PCFG_PARSER_HPP__ 208 | -------------------------------------------------------------------------------- /include/pfp/state_list.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __STATE_LIST_HPP__ 2 | #define __STATE_LIST_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace com { namespace wavii { namespace pfp { 12 | 13 | class state_list 14 | { 15 | public: 16 | 17 | struct state 18 | { 19 | std::string tag; 20 | state_t index; 21 | bool synthetic; // state is for pcfg internal use 22 | bool open_class; // state is safe for lexicon sig. guessing 23 | bool operator < (const state & other) const 24 | { 25 | return index < other.index; 26 | } 27 | // removes functional modifiers of a state, markovizations, and other junk 28 | std::string basic_category() 29 | { 30 | const char delims[] = {'=', '|', '#', '^', '~', '_'}; 31 | size_t i = 0; 32 | for (; i != tag.size(); ++i) 33 | { 34 | if (std::find(delims, delims + sizeof(delims), tag[i]) != delims + sizeof(delims)) 35 | break; 36 | } 37 | return tag.substr(0, i); 38 | } 39 | }; 40 | 41 | typedef std::vector::iterator iterator; 42 | 43 | typedef std::vector::const_iterator const_iterator; 44 | 45 | private: 46 | 47 | state_t m_size; 48 | std::vector< state > m_states; 49 | 50 | public: 51 | 52 | state_list() : m_size(0) {} 53 | 54 | state_list(const std::string & path) 55 | { 56 | std::ifstream in(path.c_str()); 57 | load(in); 58 | } 59 | 60 | void load(std::istream & in) 61 | { 62 | state s; 63 | while (in >> s.index) 64 | { 65 | in.get(); // skip space 66 | std::getline(in, s.tag); 67 | s.synthetic = (s.tag[0] == '@'); 68 | s.open_class = (s.tag[0] == '+'); 69 | if (s.open_class) 70 | s.tag = s.tag.substr(1); 71 | m_states.push_back(s); 72 | } 73 | std::sort(m_states.begin(), m_states.end()); 74 | } 75 | 76 | const state & operator[](int index) const { return m_states[index]; } 77 | 78 | state & operator[](int index) { return m_states[index]; } 79 | 80 | state_t size() const { return m_states.size(); } 81 | 82 | iterator begin() { return m_states.begin(); } 83 | 84 | iterator end() { return m_states.end(); } 85 | 86 | const_iterator begin() const { return m_states.begin(); } 87 | 88 | const_iterator end() const { return m_states.end(); } 89 | 90 | }; 91 | 92 | }}} // com::wavii::pfp 93 | 94 | #endif // __STATE_LIST_HPP__ 95 | -------------------------------------------------------------------------------- /include/pfp/tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef __TOKENIZER_H__ 2 | #define __TOKENIZER_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace com { namespace wavii { namespace pfp { 13 | 14 | // lexer/tokenizer that fits fairly close to Penn Treebank conventions. 15 | // this wraps the lexer rules in etc/tokenizer.flex 16 | class tokenizer 17 | { 18 | private: 19 | 20 | struct replacement 21 | { 22 | boost::regex find; 23 | std::string format; 24 | boost::optional< boost::regex > unless; 25 | replacement(std::string find_, std::string format_) : find(find_), format(format_) {} 26 | replacement(std::string find_, std::string format_, std::string unless_) : find(find_), format(format_), unless(boost::regex(unless_)) {} 27 | }; 28 | 29 | boost::unordered_map m_literals; 30 | std::vector< replacement > m_replacements; 31 | 32 | // convert colour to color 33 | // british, canadian - doesn't matter. deep down we're all american. 34 | std::string americanize(const std::string & in) const 35 | { 36 | boost::unordered_map::const_iterator it_h = m_literals.find(in); 37 | if (it_h != m_literals.end()) 38 | return it_h->second; 39 | boost::smatch mr; 40 | for (std::vector::const_iterator it = m_replacements.begin(); it != m_replacements.end(); ++it) 41 | { 42 | if ( !(it->unless && boost::regex_match(in, *it->unless)) 43 | && boost::regex_match(in, mr, it->find)) 44 | return mr.format(it->format); 45 | } 46 | return in; 47 | } 48 | 49 | // convert certain windows codepage peculiarities that we shouldn't 50 | // have to see, but may crop up anyway 51 | std::string cp1252_normalize(const std::string & in) const 52 | { 53 | std::string ret = in; 54 | boost::algorithm::replace_all(ret, "'", "'"); 55 | boost::algorithm::replace_all(ret, "\xc2\x91", "`"); 56 | boost::algorithm::replace_all(ret, "\xe2\x80\x98", "`"); 57 | boost::algorithm::replace_all(ret, "\xc2\x92", "'"); 58 | boost::algorithm::replace_all(ret, "\xe2\x80\x99", "'"); 59 | boost::algorithm::replace_all(ret, "\xc2\x93", "``"); 60 | boost::algorithm::replace_all(ret, "\xe2\x80\x9c", "``"); 61 | boost::algorithm::replace_all(ret, "\xc2\x94", "''"); 62 | boost::algorithm::replace_all(ret, "\xe2\x80\x9d", "''"); 63 | boost::algorithm::replace_all(ret, "\xc2\xbc", "1\\/4"); 64 | boost::algorithm::replace_all(ret, "\xc2\xbd", "1\\/2"); 65 | boost::algorithm::replace_all(ret, "\xc2\xbe", "3\\/4"); 66 | boost::algorithm::replace_all(ret, "\xc2\xa2", "cents"); 67 | boost::algorithm::replace_all(ret, "\xc2\xa3", "#"); 68 | boost::algorithm::replace_all(ret, "\xc2\x80", "$"); 69 | boost::algorithm::replace_all(ret, "\xe2\x82\xac", "$"); // Euro -- no good translation! 70 | return ret; 71 | } 72 | 73 | // escape x -> \x 74 | std::string escape(const std::string & in, char echar) const 75 | { 76 | std::string ret = in; 77 | std::string c(1, echar), ec = "\\" + c, eec = "\\\\" + c; 78 | // change c -> \c 79 | boost::algorithm::replace_all(ret, c, ec); 80 | // change \\c -> \c 81 | boost::algorithm::replace_all(ret, eec, ec); 82 | return ret; 83 | } 84 | 85 | std::string ampersandize(const std::string & in) const 86 | { 87 | return boost::algorithm::ireplace_all_copy(in, "&", "&"); 88 | } 89 | 90 | void init() 91 | { 92 | m_replacements.push_back(replacement("(.*)haem(at)?o(.*)", "$1hem$2o$3")); 93 | m_replacements.push_back(replacement("(.*)aemia$", "$1emia")); 94 | m_replacements.push_back(replacement("(.*)([lL]euk)aem(.*)", "$1$2em$3")); 95 | m_replacements.push_back(replacement("(.*)programme(s?)$", "$1program$2")); 96 | m_replacements.push_back(replacement("^([a-z]{3,})our(s?)$", "$1or$2", "glamour|de[tv]our")); 97 | } 98 | 99 | public: 100 | 101 | class tokenizer_out 102 | { 103 | private: 104 | std::vector & m_out; 105 | const com::wavii::pfp::tokenizer & m_t; 106 | public: 107 | tokenizer_out(std::vector & out, const com::wavii::pfp::tokenizer & t) : m_out(out), m_t(t) {} 108 | void put(const char * p) { m_out.push_back(p); } 109 | void put_american(const char * p) { m_out.push_back(m_t.americanize(p)); } 110 | void put_cp1252(const char * p) { m_out.push_back(m_t.cp1252_normalize(p)); } 111 | void put_escape(const char * p, char echar) { m_out.push_back(m_t.escape(p, echar)); } 112 | void put_amp(const char * p) { m_out.push_back(m_t.ampersandize(p)); } 113 | void err(const char * p) { /* do zilch */ } 114 | }; 115 | 116 | void tokenize(const std::string & in, std::vector & out) const; 117 | 118 | tokenizer() 119 | { 120 | init(); 121 | } 122 | 123 | tokenizer(const std::string & americanize_path) 124 | { 125 | init(); 126 | std::ifstream in(americanize_path.c_str()); 127 | load(in); 128 | } 129 | 130 | void load(std::istream & in) 131 | { 132 | std::pair< std::string, std::string > kv; 133 | m_literals.clear(); 134 | while (in >> kv.first >> kv.second) 135 | m_literals.insert(kv); 136 | } 137 | }; 138 | 139 | 140 | }}} // com::wavii::pfp 141 | 142 | #endif // __TOKENIZER_H__ 143 | -------------------------------------------------------------------------------- /include/pfp/unary_grammar.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __UNARY_GRAMMAR_HPP__ 2 | #define __UNARY_GRAMMAR_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace com { namespace wavii { namespace pfp { 13 | 14 | // unary rules have a child, parent, weight 15 | // such as: NNP => NP, 100 16 | // weights represent the preference for their respective rule 17 | // we also try to produce the closure of the grammar by creating transitive associations: 18 | // A->B, 100 and B->C, 100 : A->C 200 19 | // but we only go one level deep - probably not a full closure 20 | class unary_grammar 21 | { 22 | public: 23 | 24 | struct relationship 25 | { 26 | state_t child; 27 | state_score_t result; 28 | relationship() {} 29 | relationship(state_t child_, state_score_t result_) : child(child_), result(result_) {} 30 | bool operator < (const relationship & rhs) const 31 | { 32 | return child < rhs.child || (child == rhs.child && result.state < rhs.result.state); 33 | } 34 | }; 35 | 36 | typedef std::vector< relationship >::const_iterator const_iterator; 37 | 38 | private: 39 | 40 | const state_list & m_states; 41 | std::vector< std::vector< relationship > > m_rules_parent; // rules indexed by parent 42 | std::vector< relationship > m_rules_closed; // rules' closure for transitive relationships 43 | 44 | public: 45 | 46 | unary_grammar(const state_list & states) : m_states(states) {} 47 | 48 | unary_grammar(const state_list & states, const std::string & path) 49 | : m_states(states) 50 | { 51 | std::ifstream in(path.c_str()); 52 | load(in); 53 | } 54 | 55 | void load(std::istream & in) 56 | { 57 | m_rules_parent.resize(m_states.size()); 58 | m_rules_closed.clear(); 59 | // stream is => [ child parent score ] 60 | relationship rel; 61 | // keep track of closed rels to avoid dupes 62 | std::vector< std::vector< relationship > > children_closed(m_states.size()); 63 | std::vector< std::vector< relationship > > parents_closed(m_states.size()); 64 | boost::unordered_set< std::pair< state_t, state_t > > closed_rels; 65 | // initialize unity rules in closure 66 | for (state_t i = 0; i != m_states.size(); ++i) 67 | { 68 | parents_closed[i].push_back(relationship(i, state_score_t(i, 0.0f))); 69 | children_closed[i].push_back(relationship(i, state_score_t(i, 0.0f))); 70 | closed_rels.insert(std::make_pair(i, i)); 71 | } 72 | float f; 73 | size_t parents_closed_total = 0; 74 | while (in >> rel.child >> rel.result.state >> f) 75 | { 76 | rel.result.score = static_cast(f * consts::score_resolution); 77 | parents_closed[rel.child].push_back(rel); ++parents_closed_total; 78 | children_closed[rel.result.state].push_back(rel); 79 | m_rules_parent[rel.result.state].push_back(rel); 80 | closed_rels.insert(std::make_pair(rel.child, rel.result.state)); 81 | // add transitive rule: [all parents of rel.parent] -> [all children of rel.child] 82 | for (state_t i = 0, sz_i = parents_closed[rel.result.state].size(); i != sz_i; ++i) 83 | { 84 | relationship & reli = parents_closed[rel.result.state][i]; 85 | for (state_t j = 0, sz_j = children_closed[rel.child].size(); j != sz_j; ++j) 86 | { 87 | relationship & relj = children_closed[rel.child][j]; 88 | if (closed_rels.find(std::make_pair(relj.child, reli.result.state)) == closed_rels.end()) 89 | { 90 | closed_rels.insert(std::make_pair(relj.child, reli.result.state)); 91 | relationship trans_rel(relj.child, state_score_t(reli.result.state, reli.result.score + rel.result.score + relj.result.score)); 92 | parents_closed[relj.child].push_back(trans_rel); ++parents_closed_total; 93 | children_closed[reli.result.state].push_back(trans_rel); 94 | } 95 | } 96 | } 97 | } 98 | m_rules_closed.reserve(parents_closed_total); 99 | // populate closed rules (all except for original identity rule) 100 | for (state_t i = 0; i != m_states.size(); ++i) 101 | { 102 | std::sort(parents_closed[i].begin() + 1, parents_closed[i].end()); 103 | std::copy(parents_closed[i].begin() + 1, parents_closed[i].end(), std::back_inserter(m_rules_closed)); 104 | } 105 | } 106 | 107 | const std::vector< relationship > & get_rules_parent(state_t parent) const 108 | { 109 | return m_rules_parent[parent]; 110 | } 111 | 112 | const_iterator closed_begin() const 113 | { 114 | return m_rules_closed.begin(); 115 | } 116 | 117 | const_iterator closed_end() const 118 | { 119 | return m_rules_closed.end(); 120 | } 121 | }; 122 | 123 | }}} // com::wavii::pfp 124 | 125 | #endif // __UNARY_GRAMMAR_HPP__ 126 | -------------------------------------------------------------------------------- /include/pfp/util.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __UTIL_HPP__ 2 | #define __UTIL_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace com { namespace wavii { namespace pfp { 12 | 13 | struct state_score_t 14 | { 15 | state_t state; 16 | score_t score; 17 | 18 | state_score_t() : state(0), score(0) {} 19 | state_score_t(state_t state_, score_t score_) : state(state_), score(score_) {} 20 | 21 | bool operator < (const state_score_t & rhs) const 22 | { 23 | return state < rhs.state; 24 | } 25 | }; 26 | 27 | // a node of an n-ary parse tree 28 | struct node 29 | { 30 | state_t state; 31 | score_t score; 32 | std::vector< boost::shared_ptr< node > > children; 33 | node() : state(0), score(0) {} 34 | node(state_t state_, score_t score_) : state(state_), score(score_) {} 35 | }; 36 | 37 | // bounds represent the widest and the narrowest that we've ever seen 38 | // a state extent right from a start position, or extend left from an 39 | // end position 40 | struct bounds 41 | { 42 | bounds() : narrow(0), wide(0) {} 43 | bounds(pos_t narrow_, pos_t wide_) : narrow(narrow_), wide(wide_) {} 44 | pos_t narrow; 45 | pos_t wide; 46 | }; 47 | 48 | struct workspace 49 | { 50 | pos_t words; // the longest sentence this workspace will support 51 | state_t states; // the number of states this workspace will support 52 | 53 | std::vector< std::vector< bounds > > left_extents; // end, state => bounds 54 | std::vector< std::vector< bounds > > rite_extents; // begin, state => bounds 55 | std::vector< std::vector< state_t > > seen_states; // begin => state (sparse) 56 | std::vector< std::vector< std::vector < score_t > > > state_scores; // begin, end => states 57 | 58 | workspace(pos_t words_, state_t states_) 59 | : words(words_), states(states_), 60 | left_extents(words + 1), rite_extents(words), 61 | seen_states(words), state_scores(words) 62 | { 63 | for (pos_t i = 0; i != words + 1; ++i) 64 | left_extents[i].resize(states); 65 | for (pos_t i = 0; i != words; ++i) 66 | rite_extents[i].resize(states); 67 | for (pos_t i = 0; i != words; ++i) 68 | seen_states[i].reserve(1024); 69 | for (pos_t i = 0; i != words; ++i) 70 | state_scores[i].resize(words + 1); 71 | // note the upper triangularness: end > start 72 | // and the range to end is inclusive, so length + 1 73 | for (pos_t i = 0; i != words; ++i) 74 | { 75 | for (pos_t j = i + 1; j != words + 1; ++j) 76 | state_scores[i][j].resize(states); 77 | } 78 | } 79 | 80 | void clear() 81 | { 82 | clear(words); 83 | } 84 | 85 | // any workspace size >= our sentence size will do, 86 | // but we only need to clear up to as many words as we need 87 | void clear(pos_t sentence_size) 88 | { 89 | for (pos_t i = 0; i != sentence_size + 1; ++i) 90 | std::fill(left_extents[i].begin(), left_extents[i].end(), bounds(std::numeric_limits::min(), std::numeric_limits::max())); 91 | for (pos_t i = 0; i != sentence_size; ++i) 92 | std::fill(rite_extents[i].begin(), rite_extents[i].end(), bounds(std::numeric_limits::max(), std::numeric_limits::min())); 93 | for (pos_t i = 0; i != sentence_size; ++i) 94 | seen_states[i].clear(); 95 | for (pos_t i = 0; i != sentence_size; ++i) 96 | { 97 | for (pos_t j = i + 1; j != sentence_size + 1; ++j) 98 | std::fill(state_scores[i][j].begin(), state_scores[i][j].end(), consts::empty_score); 99 | } 100 | } 101 | 102 | void put(pos_t begin, pos_t end, state_t state, score_t score) 103 | { 104 | score_t & f = state_scores[begin][end][state]; 105 | if (f == consts::empty_score) 106 | { 107 | f = score; 108 | // this is the first time we've seen this state occupy [begin, end) 109 | // let's update our bounds information to fit 110 | bounds & bl = left_extents[end][state]; 111 | bounds & br = rite_extents[begin][state]; 112 | // sneaky! begin can never be > bl.narrow because diff is always increasing 113 | // UNLESS we are in initial state. mirror applies for extents below 114 | if (begin > bl.narrow) 115 | bl.narrow = bl.wide = begin; 116 | else if (begin < bl.wide) 117 | bl.wide = begin; 118 | if (end < br.narrow) 119 | { 120 | br.narrow = br.wide = end; 121 | seen_states[begin].push_back(state); 122 | } 123 | else if (end > br.wide) 124 | br.wide = end; 125 | } 126 | else if (f < score) 127 | f = score; 128 | } 129 | 130 | template 131 | void put(pos_t begin, pos_t end, InputIterator ss_begin, InputIterator ss_end) 132 | { 133 | for (; ss_begin != ss_end; ++ss_begin) 134 | put(begin, end, ss_begin->state, ss_begin->score); 135 | } 136 | 137 | score_t get(pos_t begin, pos_t end, state_t state) 138 | { 139 | return state_scores[begin][end][state]; 140 | } 141 | }; 142 | 143 | // stich a node tree to a an output 144 | template 145 | InputIterator stitch(Out & out, const node & tree, InputIterator word_it, StateList & states) 146 | { 147 | // don't output boundary 148 | if (tree.state == consts::boundary_state) 149 | return word_it; 150 | 151 | out << '('; 152 | if (states[tree.state].basic_category().empty() && tree.children.empty()) 153 | out << *word_it; 154 | else 155 | out << states[tree.state].basic_category(); 156 | out << ' '; 157 | 158 | if ( tree.children.empty() ) { 159 | out << *word_it++; 160 | } 161 | else 162 | { 163 | for (std::vector< boost::shared_ptr< node > >::const_iterator it = tree.children.begin(); it != tree.children.end(); ++it) 164 | { 165 | word_it = stitch(out, **it, word_it, states); 166 | if (it != tree.children.end() - 1) 167 | out << ' '; // give some space to lists 168 | } 169 | } 170 | out << ')'; 171 | return word_it; 172 | } 173 | 174 | }}} // com::wavii::pfp 175 | 176 | #endif // __UTIL_HPP__ 177 | -------------------------------------------------------------------------------- /include/pfpd/pfpd_handler.h: -------------------------------------------------------------------------------- 1 | #ifndef __PFPD_HANDLER_H__ 2 | #define __PFPD_HANDLER_H__ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include "resource_stack.hpp" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace com { namespace wavii { namespace pfp { 21 | 22 | class pfpd_handler : public moost::http::request_handler_base 23 | { 24 | private: 25 | 26 | tokenizer tokenizer_; 27 | state_list states_; 28 | lexicon lexicon_; 29 | unary_grammar ug_; 30 | binary_grammar bg_; 31 | pcfg_parser pcfg_; 32 | size_t timer_bucket_size_; 33 | resource_stack< workspace > workspaces_; 34 | 35 | // perform utf-8 encoded URL-decoding on a string. returns false if the encoding was invalid 36 | static bool url_decode(const std::string& in, std::string& out); 37 | 38 | // provide a version string 39 | std::string version(); 40 | 41 | // provide a little get-console for interactive parsing 42 | std::string console(const std::string & query); 43 | 44 | // tokenize, lexicon-weight, and parse a sentence 45 | std::string parse(const std::string & sentence); 46 | 47 | public: 48 | 49 | pfpd_handler(); 50 | 51 | void init(size_t sentence_length, size_t threads, const std::string & data_dir); 52 | 53 | void handle_request(const moost::http::request& req, moost::http::reply& rep); 54 | 55 | }; 56 | 57 | }}} // com::wavi::pfp 58 | 59 | #endif // __PFPD_HANDLER_H__ 60 | -------------------------------------------------------------------------------- /include/pfpd/resource_stack.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __RESOURCE_STACK_HPP__ 2 | #define __RESOURCE_STACK_HPP__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace com { namespace wavii { namespace pfp { 9 | 10 | // thread-safe way of grabbing a resource from a pool 11 | template 12 | class resource_stack 13 | { 14 | private: 15 | 16 | std::stack< T * > resources_; 17 | boost::mutex mutex_; 18 | boost::condition cond_; 19 | 20 | public: 21 | 22 | ~resource_stack() 23 | { 24 | while (!resources_.empty()) 25 | { 26 | delete resources_.top(); 27 | resources_.pop(); 28 | } 29 | } 30 | 31 | // raii resource access 32 | class scoped_resource 33 | { 34 | private: 35 | T * pr_; 36 | resource_stack & rs_; 37 | public: 38 | scoped_resource(resource_stack & rs): rs_(rs) 39 | { 40 | boost::mutex::scoped_lock lock(rs_.mutex_); 41 | while (rs_.resources_.empty()) 42 | rs_.cond_.wait(lock); 43 | pr_ = rs_.resources_.top(); 44 | rs_.resources_.pop(); 45 | } 46 | ~scoped_resource() 47 | { 48 | boost::mutex::scoped_lock lock(rs_.mutex_); 49 | rs_.resources_.push(pr_); 50 | rs_.cond_.notify_one(); 51 | } 52 | T & operator* () { return *pr_; } 53 | T * operator->() { return pr_; } 54 | }; 55 | // add a resource to the stack 56 | // we take ownership 57 | void add_resource(T * presource) 58 | { 59 | boost::mutex::scoped_lock lock(mutex_); 60 | resources_.push(presource); 61 | cond_.notify_one(); 62 | } 63 | }; 64 | 65 | }}} // com::wavii::pfp 66 | 67 | #endif // __RESOURCE_STACK_HPP__ 68 | -------------------------------------------------------------------------------- /include/pypfp/pypfp.h: -------------------------------------------------------------------------------- 1 | #ifndef __PYPFP_H__ 2 | #define __PYPFP_H__ 3 | 4 | #include // note: must include python at the beginning or it will bitch 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace com { namespace wavii { namespace pfp { 19 | 20 | /** 21 | * pypfp wraps pfp with python glue 22 | */ 23 | class pypfp 24 | { 25 | private: 26 | 27 | tokenizer tokenizer_; 28 | state_list states_; 29 | lexicon lexicon_; 30 | unary_grammar ug_; 31 | binary_grammar bg_; 32 | pcfg_parser pcfg_; 33 | boost::shared_ptr pworkspace_; 34 | 35 | void init(size_t sentence_length = 45, const std::string & data_dir = ""); 36 | std::string _parse_tokens(const std::vector& words); 37 | 38 | public: 39 | 40 | pypfp(); 41 | 42 | pypfp(size_t sentence_length); 43 | 44 | pypfp(size_t sentence_length, const std::string & data_dir); 45 | 46 | std::string parse(const std::string & sentence); 47 | 48 | std::string parse_tokens(const boost::python::list& words); 49 | 50 | }; 51 | 52 | }}} // com::wavii::pfp 53 | 54 | #endif // __PYPFP_H__ 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup, find_packages, Extension 3 | except ImportError: 4 | from ez_setup import use_setuptools 5 | use_setuptools() 6 | from setuptools import setup, find_packages, Extension 7 | 8 | from distutils.command.install import INSTALL_SCHEMES 9 | for scheme in INSTALL_SCHEMES.values(): 10 | scheme['data'] = scheme['purelib'] 11 | 12 | import os 13 | import platform 14 | if platform.platform().startswith('Darwin'): 15 | if '64bit' in platform.platform(): 16 | os.environ['ARCHFLAGS'] = "-arch x86_64" 17 | libraries = ['boost_python-mt', 'boost_filesystem-mt', 'boost_thread-mt', 'boost_regex-mt', 'boost_system-mt', 'icuio', 'icuuc'] 18 | else: 19 | libraries=['boost_python', 'boost_filesystem', 'boost_thread', 'boost_regex', 'boost_system', 'icuio'] 20 | 21 | setup( 22 | name='pfp', 23 | version='0.0.5', 24 | description='pretty fast statistical parser for natural languages', 25 | author='Wavii', 26 | author_email='info@wavii.com', 27 | url='https://github.com/wavii/pfp', 28 | packages=find_packages(exclude=['ez_setup', 'tests*', 'integration*']), 29 | include_package_data=True, 30 | zip_safe=False, 31 | 32 | ext_modules=[ 33 | Extension('pfp', 34 | ['src/pfp/config.cpp', 35 | 'src/pfp/tokenizer.yy.cpp', 36 | 'src/pypfp/pypfp.cpp'], 37 | include_dirs=['include'], 38 | libraries=libraries, 39 | extra_compile_args=['-g'] 40 | ), 41 | ], 42 | data_files=[('share', ['share/pfp/americanizations', 'share/pfp/binary_rules', 'share/pfp/sigs', 'share/pfp/sig_state', 43 | 'share/pfp/states', 'share/pfp/unary_rules', 'share/pfp/words', 'share/pfp/word_state'])] 44 | ) 45 | -------------------------------------------------------------------------------- /share/pfp/americanizations: -------------------------------------------------------------------------------- 1 | anaesthetic anesthetic 2 | analogue analog 3 | analogues analogs 4 | analyse analyze 5 | analysed analyzed 6 | analysing analyzing 7 | armoured armored 8 | cancelled canceled 9 | cancelling canceling 10 | candour candor 11 | capitalise capitalize 12 | capitalised capitalized 13 | capitalisation capitalization 14 | centre center 15 | chimaeric chimeric 16 | clamour clamor 17 | coloured colored 18 | colouring coloring 19 | defence defense 20 | detour detour 21 | discolour discolor 22 | discolours discolors 23 | discoloured discolored 24 | discolouring discoloring 25 | encyclopaedia encyclopedia 26 | endeavour endeavor 27 | endeavours endeavors 28 | endeavoured endeavored 29 | endeavouring endeavoring 30 | fervour fervor 31 | favour favor 32 | favours favors 33 | favoured favored 34 | favouring favoring 35 | favourite favorite 36 | favourites favorites 37 | fibre fiber 38 | fibres fibers 39 | finalise finalize 40 | finalised finalized 41 | finalising finalizing 42 | flavour flavor 43 | flavours flavors 44 | flavoured flavored 45 | flavouring flavoring 46 | glamour glamour 47 | grey gray 48 | harbour harbor 49 | harbours harbors 50 | homologue homolog 51 | homologues homologs 52 | honour honor 53 | honours honors 54 | honoured honored 55 | honouring honoring 56 | honourable honorable 57 | humour humor 58 | humours humors 59 | humoured humored 60 | humouring humoring 61 | kerb curb 62 | labelled labeled 63 | labelling labeling 64 | labour labor 65 | labours labors 66 | laboured labored 67 | labouring laboring 68 | leant leaned 69 | learnt learned 70 | localise localize 71 | localised localized 72 | manoeuvre maneuver 73 | manoeuvres maneuvers 74 | maximise maximize 75 | maximised maximized 76 | maximising maximizing 77 | meagre meager 78 | minimise minimize 79 | minimised minimized 80 | minimising minimizing 81 | modernise modernize 82 | modernised modernized 83 | modernising modernizing 84 | misdemeanour misdemeanor 85 | misdemeanours misdemeanors 86 | neighbour neighbor 87 | neighbours neighbors 88 | neighbourhood neighborhood 89 | neighbourhoods neighborhoods 90 | oestrogen estrogen 91 | oestrogens estrogens 92 | organisation organization 93 | organisations organizations 94 | penalise penalize 95 | penalised penalized 96 | popularise popularize 97 | popularised popularized 98 | popularises popularizes 99 | popularising popularizing 100 | practise practice 101 | practised practiced 102 | pressurise pressurize 103 | pressurised pressurized 104 | pressurises pressurizes 105 | pressurising pressurizing 106 | realise realize 107 | realised realized 108 | realising realizing 109 | realises realizes 110 | recognise recognize 111 | recognised recognized 112 | recognising recognizing 113 | recognises recognizes 114 | rumoured rumored 115 | rumouring rumoring 116 | savour savor 117 | savours savors 118 | savoured savored 119 | savouring savoring 120 | splendour splendor 121 | splendours splendors 122 | theatre theater 123 | theatres theaters 124 | titre titer 125 | titres titers 126 | travelled traveled 127 | travelling traveling 128 | january January 129 | february February 130 | april April 131 | june June 132 | july July 133 | august August 134 | september September 135 | october October 136 | november November 137 | december December 138 | monday Monday 139 | tuesday Tuesday 140 | wednesday Wednesday 141 | thursday Thursday 142 | friday Friday 143 | saturday Saturday 144 | sunday Sunday 145 | -------------------------------------------------------------------------------- /share/pfp/sig_state: -------------------------------------------------------------------------------- 1 | 115 193 1 2 | 186 145 7 3 | 123 67 2 4 | 135 77 233 5 | 96 145 51 6 | 136 13 1 7 | 41 151 1 8 | 102 145 11 9 | 143 145 24 10 | 39 151 17 11 | 121 145 34 12 | 95 145 8 13 | 123 145 331 14 | 84 77 3 15 | 136 3890 1 16 | 19 151 1 17 | 47 145 11 18 | 46 313 5 19 | 42 145 1 20 | 155 174 2 21 | 187 77 9 22 | 183 277 1 23 | 142 136 91 24 | 103 136 5 25 | 118 174 1 26 | 85 151 10 27 | 154 174 8 28 | 39 86 1 29 | 134 390 1 30 | 146 151 11 31 | 96 106 3 32 | 125 1759 1 33 | 71 483 1 34 | 59 145 2 35 | 102 106 3 36 | 147 151 33 37 | 0 87 1 38 | 115 2940 1 39 | 115 77 3 40 | 123 137 2 41 | 143 137 1 42 | 52 483 1 43 | 170 151 1 44 | 121 66 2 45 | 96 256 195 46 | 111 193 1 47 | 153 344 1 48 | 36 151 3 49 | 84 6349 1 50 | 155 151 10 51 | 102 66 1 52 | 27 151 3 53 | 138 174 1 54 | 140 344 2 55 | 102 256 4 56 | 85 174 1 57 | 123 256 7 58 | 35 145 1 59 | 132 279 1 60 | 167 151 8 61 | 88 151 1 62 | 181 231 1 63 | 96 304 251 64 | 116 344 1 65 | 136 193 22 66 | 44 151 1 67 | 123 304 2 68 | 104 193 1 69 | 131 2940 1 70 | 102 304 14 71 | 143 304 1 72 | 22 145 1 73 | 116 193 1 74 | 125 344 1 75 | 136 344 1 76 | 83 74 1 77 | 174 151 1 78 | 98 145 4 79 | 104 344 3 80 | 142 383 2 81 | 103 383 1 82 | 193 136 1 83 | 12 304 1 84 | 123 90 1 85 | 173 151 1 86 | 161 151 6 87 | 66 151 2 88 | 172 145 1 89 | 95 2291 1 90 | 140 1695 1 91 | 159 151 5 92 | 158 151 3 93 | 140 136 1 94 | 83 151 18 95 | 191 151 27 96 | 93 5795 1 97 | 91 344 1 98 | 51 151 4 99 | 45 2967 1 100 | 46 383 1 101 | 134 2846 1 102 | 36 297 2 103 | 166 151 3 104 | 83 1737 1 105 | 14 145 1 106 | 115 50 1 107 | 26 231 1 108 | 107 174 1 109 | 150 145 5 110 | 151 145 1 111 | 29 281 1 112 | 38 77 11 113 | 168 174 2 114 | 183 136 2 115 | 104 136 1 116 | 68 169 4 117 | 48 151 8 118 | 83 86 1 119 | 75 151 9 120 | 186 200 1 121 | 48 279 2 122 | 25 145 67 123 | 90 136 2 124 | 86 151 2 125 | 1 1087 1 126 | 143 177 2 127 | 21 151 2 128 | 132 181 1 129 | 5 151 10 130 | 29 151 3 131 | 104 50 3 132 | 123 192 1 133 | 116 50 1 134 | 165 145 1 135 | 103 344 1 136 | 140 8250 1 137 | 13 145 52 138 | 30 145 3 139 | 186 428 1 140 | 57 136 1 141 | 81 151 13 142 | 144 151 6 143 | 85 125 1 144 | 113 344 1 145 | 180 231 1 146 | 188 4685 1 147 | 96 1021 2 148 | 153 50 1 149 | 16 145 1 150 | 142 313 5 151 | 49 151 26 152 | 80 136 2 153 | 137 169 1 154 | 1 383 1 155 | 162 169 1 156 | 46 136 60 157 | 168 151 9 158 | 177 8395 1 159 | 91 390 1 160 | 194 151 29 161 | 123 155 3 162 | 94 174 2 163 | 96 336 1 164 | 188 50 1 165 | 164 169 2 166 | 39 297 2 167 | 193 118 2 168 | 155 50 1 169 | 91 856 1 170 | 38 88 7 171 | 105 174 3 172 | 128 297 6 173 | 129 118 1 174 | 6 5533 1 175 | 3 151 21 176 | 93 181 1 177 | 175 25 1 178 | 135 25 2 179 | 134 118 1 180 | 32 151 20 181 | 31 151 6 182 | 98 194 2 183 | 38 151 36 184 | 140 118 111 185 | 135 118 5 186 | 96 833 1 187 | 192 151 18 188 | 98 169 1 189 | 73 165 3 190 | 1 118 1 191 | 101 174 8 192 | 37 151 17 193 | 25 372 1 194 | 84 118 1 195 | 74 151 7 196 | 96 210 27 197 | 104 353 1 198 | 112 174 1 199 | 157 174 1 200 | 74 1737 1 201 | 71 118 2 202 | 136 353 1 203 | 129 11337 1 204 | 125 353 1 205 | 104 118 2 206 | 110 174 1 207 | 152 151 4 208 | 180 151 76 209 | 104 181 1 210 | 124 297 1 211 | 26 174 2 212 | 142 151 1 213 | 4 151 4 214 | 126 297 4 215 | 48 270 1 216 | 90 118 29 217 | 184 279 1 218 | 175 297 1 219 | 91 2639 1 220 | 1 297 2 221 | 87 151 1 222 | 182 183 1 223 | 148 183 2 224 | 193 23 1 225 | 123 109 2 226 | 128 25 1 227 | 134 74 14 228 | 116 2289 1 229 | 135 297 1 230 | 140 297 1 231 | 169 151 1 232 | 141 145 2 233 | 184 151 134 234 | 148 23 3 235 | 136 127 1 236 | 188 183 2 237 | 109 174 1 238 | 64 151 2 239 | 73 148 65 240 | 175 183 6 241 | 181 183 2 242 | 134 297 5 243 | 148 297 1 244 | 131 297 1 245 | 183 148 1 246 | 125 148 321 247 | 59 169 4 248 | 74 174 1 249 | 106 297 1 250 | 129 297 3 251 | 177 148 14 252 | 190 151 10 253 | 136 297 5 254 | 126 118 1 255 | 47 169 2 256 | 122 136 2 257 | 106 148 7 258 | 95 169 84 259 | 34 183 1 260 | 123 169 127 261 | 178 297 1 262 | 96 194 1 263 | 2 151 33 264 | 96 170 42 265 | 121 169 66 266 | 102 169 207 267 | 143 169 1 268 | 186 169 1 269 | 96 169 107 270 | 123 194 3 271 | 108 174 3 272 | 157 151 7 273 | 104 297 1 274 | 121 194 1 275 | 161 344 1 276 | 152 174 1 277 | 110 151 1 278 | 89 151 4 279 | 79 151 4 280 | 177 183 1 281 | 91 279 1 282 | 145 183 2 283 | 103 174 37 284 | 140 2289 1 285 | 183 183 8 286 | 26 151 30 287 | 91 151 2 288 | 114 2289 1 289 | 177 23 2 290 | 71 151 25 291 | 80 174 1 292 | 116 151 1 293 | 97 145 1 294 | 58 145 1 295 | 163 151 15 296 | 34 151 20 297 | 6 151 21 298 | 35 296 1 299 | 26 297 2 300 | 91 297 2 301 | 1 2639 1 302 | 120 169 8 303 | 183 88 1 304 | 126 174 3 305 | 145 151 232 306 | 1 86 1 307 | 177 151 75 308 | 143 2946 1 309 | 125 187 4 310 | 90 856 3 311 | 136 151 1 312 | 91 212 3 313 | 171 151 1 314 | 103 118 1 315 | 28 145 2 316 | 84 86 1 317 | 178 151 88 318 | 183 151 427 319 | 117 169 1 320 | 73 151 3 321 | 78 151 11 322 | 82 86 1 323 | 84 88 6 324 | 148 151 124 325 | 0 10611 1 326 | 182 151 59 327 | 109 118 1 328 | 131 151 1 329 | 10 151 1 330 | 29 901 1 331 | 187 279 1 332 | 185 151 22 333 | 129 151 1 334 | 188 151 154 335 | 15 145 2 336 | 100 174 1 337 | 193 151 181 338 | 47 372 673 339 | 99 174 2 340 | 187 151 294 341 | 184 183 3 342 | 90 174 1 343 | 48 9798 1 344 | 1 151 159 345 | 57 174 2 346 | 177 86 1 347 | 176 151 19 348 | 153 151 7 349 | 115 174 4 350 | 84 151 15 351 | 135 279 1 352 | 181 151 150 353 | 140 279 1 354 | 43 174 1 355 | 175 151 376 356 | 187 88 142 357 | 96 232 1 358 | 114 151 2 359 | 85 9779 1 360 | 82 151 2 361 | 52 174 3 362 | 175 279 1 363 | 28 636 1 364 | 135 856 2 365 | 96 306 10 366 | 186 306 2 367 | 77 279 1 368 | 123 1702 1 369 | 140 856 27 370 | 150 169 4 371 | 33 151 8 372 | 71 174 4 373 | 80 151 34 374 | 123 139 1 375 | 116 174 11 376 | 163 174 1 377 | 123 306 115 378 | 77 151 2 379 | 7 151 3 380 | 143 306 2 381 | 150 194 1 382 | 91 25 1 383 | 55 151 2 384 | 121 306 1 385 | 136 174 21 386 | 181 281 1 387 | 180 183 1 388 | 125 174 19 389 | 25 169 2 390 | 84 281 1 391 | 93 856 1 392 | 193 856 1 393 | 62 151 1 394 | 26 118 1 395 | 131 408 1 396 | 193 2847 1 397 | 130 297 1 398 | 79 118 1 399 | 46 151 1 400 | 175 39 1 401 | 73 174 1 402 | 68 145 5 403 | 104 174 25 404 | 143 207 1 405 | 90 279 1 406 | 111 174 7 407 | 179 151 111 408 | 34 281 1 409 | 134 174 5 410 | 135 4697 1 411 | 172 169 1 412 | 143 303 1 413 | 53 151 1 414 | 148 174 1 415 | 131 174 5 416 | 10 174 1 417 | 80 86 1 418 | 186 303 4 419 | 187 174 1 420 | 99 151 1 421 | 90 151 12 422 | 85 193 2 423 | 135 382 2 424 | 123 150 2 425 | 106 174 8 426 | 129 174 30 427 | 84 4697 1 428 | 193 174 1 429 | 93 174 1 430 | 0 911 1 431 | 121 62 1 432 | 136 281 1 433 | 43 151 13 434 | 149 169 1 435 | 175 174 2 436 | 57 151 2 437 | 1 174 2 438 | 72 145 1 439 | 153 174 12 440 | 135 174 30 441 | 140 174 22 442 | 52 151 12 443 | 37 74 1 444 | 128 279 1 445 | 114 174 1 446 | 8 194 1 447 | 54 174 1 448 | 174 145 7 449 | 183 303 5 450 | 1 306 2 451 | 145 150 1 452 | 177 150 1 453 | 193 296 1 454 | 136 150 1 455 | 175 306 8 456 | 125 150 1 457 | 37 169 3 458 | 135 1702 1 459 | 74 169 2 460 | 123 856 2 461 | 135 48 16 462 | 177 303 1 463 | 145 303 1 464 | 114 306 2 465 | 140 306 12 466 | 178 303 1 467 | 46 372 11 468 | 139 306 1 469 | 148 296 1 470 | 135 306 3 471 | 123 408 1 472 | 131 306 9 473 | 116 150 1 474 | 148 306 2 475 | 109 169 42 476 | 134 306 12 477 | 111 306 1 478 | 185 306 1 479 | 167 145 1 480 | 187 48 2 481 | 32 169 2 482 | 132 67 1 483 | 103 625 1 484 | 113 194 1 485 | 131 48 1 486 | 175 296 1 487 | 1 161 1 488 | 193 306 1 489 | 129 306 12 490 | 119 145 7 491 | 188 306 3 492 | 154 145 15 493 | 0 1671 1 494 | 187 306 1 495 | 113 169 11 496 | 132 145 21 497 | 155 145 11 498 | 103 169 160 499 | 115 372 1 500 | 61 66 1 501 | 136 306 29 502 | 125 306 8 503 | 11 145 1 504 | 0 243 1 505 | 183 306 2 506 | 1 303 2 507 | 35 151 6 508 | 104 306 4 509 | 134 62 1 510 | 176 303 1 511 | 163 296 1 512 | 136 1702 1 513 | 78 306 1 514 | 152 169 5 515 | 39 256 1 516 | 175 303 5 517 | 140 150 1 518 | 1 207 1 519 | 189 151 13 520 | 39 5087 1 521 | 4 194 1 522 | 116 306 3 523 | 193 303 1 524 | 108 169 20 525 | 187 207 1 526 | 131 150 1 527 | 71 306 4 528 | 130 169 3 529 | 187 303 2 530 | 134 150 1 531 | 96 174 48 532 | 34 306 1 533 | 92 145 1 534 | 121 174 3 535 | 129 150 1 536 | 108 194 2 537 | 143 174 2 538 | 182 303 2 539 | 102 174 38 540 | 138 256 1 541 | 193 150 1 542 | 122 145 15 543 | 187 150 1 544 | 145 296 1 545 | 123 174 69 546 | 95 174 21 547 | 40 151 2 548 | 101 169 19 549 | 54 151 2 550 | 146 145 2 551 | 174 137 6 552 | 128 306 7 553 | 98 174 1 554 | 52 306 1 555 | 167 137 3 556 | 177 172 1 557 | 125 172 51 558 | 52 296 1 559 | 64 169 4 560 | 174 287 1 561 | 126 150 1 562 | 73 172 8 563 | 160 151 4 564 | 154 137 3 565 | 105 169 7 566 | 115 296 1 567 | 34 1983 1 568 | 91 169 2 569 | 63 169 1 570 | 26 169 3 571 | 42 151 5 572 | 128 150 1 573 | 110 169 8 574 | 138 145 12 575 | 79 169 1 576 | 143 88 2 577 | 85 145 12 578 | 124 306 2 579 | 179 150 1 580 | 96 151 1 581 | 186 151 240 582 | 112 194 3 583 | 85 1720 1 584 | 157 169 4 585 | 122 137 5 586 | 179 303 3 587 | 47 151 3 588 | 126 306 19 589 | 123 151 2 590 | 112 169 8 591 | 77 306 2 592 | 39 145 8 593 | 143 151 93 594 | 104 169 159 595 | 60 151 1 596 | 192 303 1 597 | 183 169 22 598 | 168 145 3 599 | 90 833 8 600 | 125 194 3 601 | 73 169 4 602 | 136 194 2 603 | 96 297 2 604 | 78 169 12 605 | 48 137 1 606 | 104 194 2 607 | 67 145 1 608 | 145 169 1 609 | 125 169 30 610 | 136 169 41 611 | 178 169 3 612 | 123 297 30 613 | 194 145 1 614 | 171 169 1 615 | 156 151 1 616 | 70 169 2 617 | 123 869 1 618 | 6 170 1 619 | 12 151 2 620 | 116 194 3 621 | 181 5568 1 622 | 186 183 1 623 | 93 109 1 624 | 71 169 16 625 | 49 145 1 626 | 143 183 1 627 | 93 447 1 628 | 163 169 13 629 | 107 145 5 630 | 108 306 1 631 | 116 169 107 632 | 6 169 2 633 | 9 151 1 634 | 130 306 6 635 | 1 194 1 636 | 114 169 47 637 | 135 169 47 638 | 82 169 2 639 | 139 169 2 640 | 65 145 1 641 | 140 169 84 642 | 83 137 1 643 | 52 210 2 644 | 81 145 1 645 | 114 194 2 646 | 153 169 49 647 | 180 303 1 648 | 84 169 6 649 | 30 151 6 650 | 13 151 9 651 | 74 306 2 652 | 175 169 13 653 | 181 169 5 654 | 140 194 2 655 | 188 169 5 656 | 129 169 88 657 | 187 135 2 658 | 193 169 3 659 | 93 169 4 660 | 187 169 2 661 | 148 170 1 662 | 106 169 119 663 | 111 194 1 664 | 148 169 2 665 | 129 194 1 666 | 93 194 1 667 | 29 145 1 668 | 131 169 22 669 | 10 169 1 670 | 5 145 11 671 | 83 66 1 672 | 130 150 1 673 | 32 306 1 674 | 106 194 5 675 | 185 169 1 676 | 111 169 59 677 | 134 169 16 678 | 46 169 3 679 | 79 306 1 680 | 102 57 1 681 | 48 145 3 682 | 94 145 8 683 | 25 151 5 684 | 48 67 1 685 | 26 306 5 686 | 62 169 2 687 | 91 306 4 688 | 67 137 5 689 | 77 169 1 690 | 123 118 3 691 | 184 303 1 692 | 126 169 10 693 | 7 169 1 694 | 80 194 1 695 | 140 833 57 696 | 123 181 1 697 | 166 145 1 698 | 55 169 21 699 | 121 118 2 700 | 96 118 3 701 | 124 169 1 702 | 7 194 1 703 | 49 137 1 704 | 80 169 4 705 | 150 151 6 706 | 107 137 1 707 | 52 169 10 708 | 83 145 9 709 | 159 145 1 710 | 133 306 3 711 | 142 372 115 712 | 13 174 3 713 | 115 169 34 714 | 149 151 1 715 | 43 169 1 716 | 52 170 7 717 | 143 267 1 718 | 57 169 12 719 | 51 145 1 720 | 128 169 2 721 | 90 169 4 722 | 99 169 8 723 | 148 210 1 724 | 47 348 1 725 | 96 5533 1 726 | 100 169 2 727 | 56 145 1 728 | 179 169 2 729 | 53 169 1 730 | 172 151 5 731 | 66 145 4 732 | 29 137 1 733 | 1 9785 1 734 | 161 145 3 735 | 85 169 7 736 | 91 67 5 737 | 79 145 7 738 | 110 145 6 739 | 48 306 2 740 | 39 19 1 741 | 91 145 60 742 | 26 145 4 743 | 79 67 2 744 | 138 194 1 745 | 129 361 1 746 | 103 137 1 747 | 180 256 1 748 | 129 177 1 749 | 39 169 2 750 | 2 145 1 751 | 112 145 2 752 | 157 145 2 753 | 175 1021 1 754 | 131 155 2 755 | 18 151 1 756 | 109 66 1 757 | 147 169 2 758 | 133 145 29 759 | 96 50 1 760 | 83 306 1 761 | 38 137 42 762 | 140 1021 8 763 | 31 256 1 764 | 129 155 1 765 | 123 50 2 766 | 28 148 6 767 | 101 145 1 768 | 105 145 3 769 | 50 151 1 770 | 113 137 1 771 | 47 383 19 772 | 135 155 1 773 | 1 5776 1 774 | 180 145 3 775 | 96 6991 1 776 | 137 181 1 777 | 103 145 28 778 | 118 169 1 779 | 91 137 1 780 | 155 169 17 781 | 123 136 1 782 | 47 136 920 783 | 122 169 10 784 | 134 55 1 785 | 102 136 1 786 | 124 1021 1 787 | 37 1988 1 788 | 110 66 1 789 | 130 145 63 790 | 129 2291 1 791 | 108 145 6 792 | 38 145 2 793 | 74 145 1 794 | 37 145 9 795 | 1 2854 2 796 | 91 304 1 797 | 187 3294 1 798 | 174 169 1 799 | 104 2291 1 800 | 113 145 13 801 | 132 169 4 802 | 3 145 3 803 | 20 145 3 804 | 119 169 2 805 | 154 169 10 806 | 31 145 2 807 | 132 194 1 808 | 23 151 1 809 | 124 177 1 810 | 109 145 11 811 | 29 306 1 812 | 90 1021 1 813 | 128 145 28 814 | 72 151 1 815 | 51 169 4 816 | 57 145 3 817 | 166 5568 1 818 | 125 304 3 819 | 175 137 1 820 | 43 145 2 821 | 136 304 4 822 | 84 137 129 823 | 148 256 2 824 | 115 145 11 825 | 106 66 1 826 | 103 2859 3 827 | 183 304 1 828 | 159 169 4 829 | 52 145 6 830 | 102 193 2 831 | 83 169 2 832 | 140 137 1 833 | 188 5087 1 834 | 158 169 3 835 | 135 137 1036 836 | 134 66 16 837 | 66 169 1 838 | 131 137 9 839 | 161 169 3 840 | 69 169 1 841 | 114 66 1 842 | 6 304 2 843 | 76 151 1 844 | 24 169 1 845 | 56 169 2 846 | 96 344 3 847 | 179 145 1 848 | 79 273 1 849 | 99 67 1 850 | 93 137 3 851 | 100 145 3 852 | 129 137 1 853 | 95 344 1 854 | 106 137 2 855 | 179 67 1 856 | 123 344 1 857 | 90 145 3 858 | 99 145 11 859 | 187 137 61 860 | 147 303 1 861 | 127 145 1 862 | 6 256 12 863 | 134 2725 1 864 | 94 169 1 865 | 136 137 1 866 | 116 66 1 867 | 103 1021 1 868 | 68 151 2 869 | 137 174 1 870 | 47 313 2 871 | 123 231 1 872 | 46 145 7 873 | 138 306 1 874 | 148 304 3 875 | 136 256 2 876 | 80 145 41 877 | 71 137 2 878 | 131 304 10 879 | 124 145 14 880 | 17 145 1 881 | 41 306 1 882 | 85 296 1 883 | 34 137 1 884 | 166 169 5 885 | 55 145 3 886 | 39 306 1 887 | 1 123 1 888 | 126 145 28 889 | 52 106 1 890 | 77 145 3 891 | 139 67 1 892 | 175 145 25 893 | 181 145 4 894 | 135 67 3 895 | 28 86 1 896 | 84 145 6 897 | 115 137 74 898 | 153 145 1 899 | 91 177 1 900 | 1 145 47 901 | 65 169 1 902 | 139 145 10 903 | 140 145 174 904 | 96 352 1 905 | 82 145 2 906 | 135 145 97 907 | 114 145 47 908 | 71 9788 1 909 | 185 145 1 910 | 111 145 51 911 | 134 145 213 912 | 128 256 1 913 | 187 67 2 914 | 80 304 2 915 | 93 67 1 916 | 131 145 255 917 | 182 145 2 918 | 148 145 4 919 | 45 177 1 920 | 133 155 1 921 | 106 145 39 922 | 115 66 1 923 | 91 361 1 924 | 134 67 3 925 | 187 145 4 926 | 193 145 8 927 | 57 66 1 928 | 93 145 55 929 | 52 256 1 930 | 26 35 1 931 | 131 67 1 932 | 188 145 6 933 | 129 145 66 934 | 25 136 1 935 | 178 145 2 936 | 28 151 7 937 | 171 145 2 938 | 125 145 87 939 | 136 145 160 940 | 67 169 2 941 | 177 145 12 942 | 145 145 8 943 | 127 137 7 944 | 128 304 1 945 | 78 145 6 946 | 106 106 1 947 | 52 304 27 948 | 73 145 15 949 | 162 151 2 950 | 136 67 1 951 | 183 145 14 952 | 188 1988 1 953 | 168 169 7 954 | 104 145 44 955 | 1 636 1 956 | 6 145 4 957 | 0 3067 1 958 | 34 145 4 959 | 163 145 4 960 | 107 169 20 961 | 116 145 51 962 | 80 137 1 963 | 71 145 19 964 | 134 814 1 965 | 70 145 2 966 | -------------------------------------------------------------------------------- /share/pfp/sigs: -------------------------------------------------------------------------------- 1 | 0 UNK 2 | 1 UNK-ALLC 3 | 2 UNK-ALLC. 4 | 3 UNK-ALLCa 5 | 4 UNK-ALLCb 6 | 5 UNK-ALLCc 7 | 6 UNK-ALLCd 8 | 7 UNK-ALLC-DASH 9 | 8 UNK-ALLC-DASH- 10 | 9 UNK-ALLC-DASH. 11 | 10 UNK-ALLC-DASHa 12 | 11 UNK-ALLC-DASHb 13 | 12 UNK-ALLC-DASHd 14 | 13 UNK-ALLC-DASH-DIG 15 | 14 UNK-ALLC-DASHe 16 | 15 UNK-ALLC-DASHf 17 | 16 UNK-ALLC-DASHi 18 | 17 UNK-ALLC-DASHm 19 | 18 UNK-ALLC-DASHn 20 | 19 UNK-ALLC-DASHp 21 | 20 UNK-ALLC-DASHr 22 | 21 UNK-ALLC-DASHs 23 | 22 UNK-ALLC-DASHt 24 | 23 UNK-ALLC-DASHv 25 | 24 UNK-ALLC-DASHy 26 | 25 UNK-ALLC-DIG 27 | 26 UNK-ALLCe 28 | 27 UNK-ALLCf 29 | 28 UNK-ALLCg 30 | 29 UNK-ALLCh 31 | 30 UNK-ALLCi 32 | 31 UNK-ALLCk 33 | 32 UNK-ALLCl 34 | 33 UNK-ALLCm 35 | 34 UNK-ALLCn 36 | 35 UNK-ALLCo 37 | 36 UNK-ALLCp 38 | 37 UNK-ALLCr 39 | 38 UNK-ALLCs 40 | 39 UNK-ALLCt 41 | 40 UNK-ALLCv 42 | 41 UNK-ALLCw 43 | 42 UNK-ALLCx 44 | 43 UNK-ALLCy 45 | 44 UNK-ALLCz 46 | 45 UNK-DASH 47 | 46 UNK-DASH-DIG 48 | 47 UNK-DIG 49 | 48 UNK-INIT 50 | 49 UNK-INITa 51 | 50 UNK-INITb 52 | 51 UNK-INITc 53 | 52 UNK-INITd 54 | 53 UNK-INIT-DASHa 55 | 54 UNK-INIT-DASHc 56 | 55 UNK-INIT-DASHd 57 | 56 UNK-INIT-DASH-DIG 58 | 57 UNK-INIT-DASHe 59 | 58 UNK-INIT-DASHf 60 | 59 UNK-INIT-DASHg 61 | 60 UNK-INIT-DASHh 62 | 61 UNK-INIT-DASHk 63 | 62 UNK-INIT-DASHl 64 | 63 UNK-INIT-DASHm 65 | 64 UNK-INIT-DASHn 66 | 65 UNK-INIT-DASHo 67 | 66 UNK-INIT-DASHr 68 | 67 UNK-INIT-DASHs 69 | 68 UNK-INIT-DASHt 70 | 69 UNK-INIT-DASHx 71 | 70 UNK-INIT-DASHy 72 | 71 UNK-INITe 73 | 72 UNK-INITf 74 | 73 UNK-INITg 75 | 74 UNK-INITh 76 | 75 UNK-INITi 77 | 76 UNK-INITj 78 | 77 UNK-INITk 79 | 78 UNK-INITl 80 | 79 UNK-INITm 81 | 80 UNK-INITn 82 | 81 UNK-INITo 83 | 82 UNK-INITp 84 | 83 UNK-INITr 85 | 84 UNK-INITs 86 | 85 UNK-INITt 87 | 86 UNK-INITu 88 | 87 UNK-INITv 89 | 88 UNK-INITw 90 | 89 UNK-INITx 91 | 90 UNK-INITy 92 | 91 UNK-LC 93 | 92 UNK-LC. 94 | 93 UNK-LCa 95 | 94 UNK-LCb 96 | 95 UNK-LCc 97 | 96 UNK-LCd 98 | 97 UNK-LC-DASH 99 | 98 UNK-LC-DASH- 100 | 99 UNK-LC-DASHa 101 | 100 UNK-LC-DASHb 102 | 101 UNK-LC-DASHc 103 | 102 UNK-LC-DASHd 104 | 103 UNK-LC-DASH-DIG 105 | 104 UNK-LC-DASHe 106 | 105 UNK-LC-DASHf 107 | 106 UNK-LC-DASHg 108 | 107 UNK-LC-DASHh 109 | 108 UNK-LC-DASHk 110 | 109 UNK-LC-DASHl 111 | 110 UNK-LC-DASHm 112 | 111 UNK-LC-DASHn 113 | 112 UNK-LC-DASHo 114 | 113 UNK-LC-DASHp 115 | 114 UNK-LC-DASHr 116 | 115 UNK-LC-DASHs 117 | 116 UNK-LC-DASHt 118 | 117 UNK-LC-DASHu 119 | 118 UNK-LC-DASHv 120 | 119 UNK-LC-DASHw 121 | 120 UNK-LC-DASHx 122 | 121 UNK-LC-DASHy 123 | 122 UNK-LC-DIG 124 | 123 UNK-LCe 125 | 124 UNK-LCf 126 | 125 UNK-LCg 127 | 126 UNK-LCh 128 | 127 UNK-LCi 129 | 128 UNK-LCk 130 | 129 UNK-LCl 131 | 130 UNK-LCm 132 | 131 UNK-LCn 133 | 132 UNK-LCo 134 | 133 UNK-LCp 135 | 134 UNK-LCr 136 | 135 UNK-LCs 137 | 136 UNK-LCt 138 | 137 UNK-LCu 139 | 138 UNK-LCw 140 | 139 UNK-LCx 141 | 140 UNK-LCy 142 | 141 UNK-LCz 143 | 142 UNK-NUM 144 | 143 UNK-UC 145 | 144 UNK-UC. 146 | 145 UNK-UCa 147 | 146 UNK-UCb 148 | 147 UNK-UCc 149 | 148 UNK-UCd 150 | 149 UNK-UC-DASH. 151 | 150 UNK-UC-DASHa 152 | 151 UNK-UC-DASHb 153 | 152 UNK-UC-DASHc 154 | 153 UNK-UC-DASHd 155 | 154 UNK-UC-DASH-DIG 156 | 155 UNK-UC-DASHe 157 | 156 UNK-UC-DASHf 158 | 157 UNK-UC-DASHg 159 | 158 UNK-UC-DASHh 160 | 159 UNK-UC-DASHi 161 | 160 UNK-UC-DASHk 162 | 161 UNK-UC-DASHl 163 | 162 UNK-UC-DASHm 164 | 163 UNK-UC-DASHn 165 | 164 UNK-UC-DASHo 166 | 165 UNK-UC-DASHp 167 | 166 UNK-UC-DASHr 168 | 167 UNK-UC-DASHs 169 | 168 UNK-UC-DASHt 170 | 169 UNK-UC-DASHu 171 | 170 UNK-UC-DASHv 172 | 171 UNK-UC-DASHx 173 | 172 UNK-UC-DASHy 174 | 173 UNK-UC-DASHz 175 | 174 UNK-UC-DIG 176 | 175 UNK-UCe 177 | 176 UNK-UCf 178 | 177 UNK-UCg 179 | 178 UNK-UCh 180 | 179 UNK-UCi 181 | 180 UNK-UCk 182 | 181 UNK-UCl 183 | 182 UNK-UCm 184 | 183 UNK-UCn 185 | 184 UNK-UCo 186 | 185 UNK-UCp 187 | 186 UNK-UCr 188 | 187 UNK-UCs 189 | 188 UNK-UCt 190 | 189 UNK-UCu 191 | 190 UNK-UCv 192 | 191 UNK-UCw 193 | 192 UNK-UCx 194 | 193 UNK-UCy 195 | 194 UNK-UCz 196 | -------------------------------------------------------------------------------- /src/moost/http/mime_types.cpp: -------------------------------------------------------------------------------- 1 | #include "moost/http/mime_types.hpp" 2 | 3 | using namespace moost::http::mime_types; 4 | 5 | struct mapping 6 | { 7 | const char* extension; 8 | const char* mime_type; 9 | } mappings[] = 10 | { 11 | { "gif", "image/gif" }, 12 | { "htm", "text/html" }, 13 | { "html", "text/html" }, 14 | { "jpg", "image/jpeg" }, 15 | { "png", "image/png" }, 16 | { 0, 0 } // Marks end of list. 17 | }; 18 | 19 | std::string moost::http::mime_types::extension_to_type(const std::string& extension) 20 | { 21 | for (mapping* m = mappings; m->extension; ++m) 22 | { 23 | if (m->extension == extension) 24 | { 25 | return m->mime_type; 26 | } 27 | } 28 | 29 | return "text/plain"; 30 | } 31 | -------------------------------------------------------------------------------- /src/moost/http/reply.cpp: -------------------------------------------------------------------------------- 1 | #include "moost/http/reply.hpp" 2 | #include 3 | #include 4 | 5 | namespace moost { namespace http { 6 | 7 | namespace status_strings { 8 | 9 | const std::string ok = 10 | "HTTP/1.0 200 OK\r\n"; 11 | const std::string created = 12 | "HTTP/1.0 201 Created\r\n"; 13 | const std::string accepted = 14 | "HTTP/1.0 202 Accepted\r\n"; 15 | const std::string no_content = 16 | "HTTP/1.0 204 No Content\r\n"; 17 | const std::string multiple_choices = 18 | "HTTP/1.0 300 Multiple Choices\r\n"; 19 | const std::string moved_permanently = 20 | "HTTP/1.0 301 Moved Permanently\r\n"; 21 | const std::string moved_temporarily = 22 | "HTTP/1.0 302 Moved Temporarily\r\n"; 23 | const std::string not_modified = 24 | "HTTP/1.0 304 Not Modified\r\n"; 25 | const std::string bad_request = 26 | "HTTP/1.0 400 Bad Request\r\n"; 27 | const std::string unauthorized = 28 | "HTTP/1.0 401 Unauthorized\r\n"; 29 | const std::string forbidden = 30 | "HTTP/1.0 403 Forbidden\r\n"; 31 | const std::string not_found = 32 | "HTTP/1.0 404 Not Found\r\n"; 33 | const std::string internal_server_error = 34 | "HTTP/1.0 500 Internal Server Error\r\n"; 35 | const std::string not_implemented = 36 | "HTTP/1.0 501 Not Implemented\r\n"; 37 | const std::string bad_gateway = 38 | "HTTP/1.0 502 Bad Gateway\r\n"; 39 | const std::string service_unavailable = 40 | "HTTP/1.0 503 Service Unavailable\r\n"; 41 | 42 | boost::asio::const_buffer to_buffer(reply::status_type status) 43 | { 44 | switch (status) 45 | { 46 | case reply::ok: 47 | return boost::asio::buffer(ok); 48 | case reply::created: 49 | return boost::asio::buffer(created); 50 | case reply::accepted: 51 | return boost::asio::buffer(accepted); 52 | case reply::no_content: 53 | return boost::asio::buffer(no_content); 54 | case reply::multiple_choices: 55 | return boost::asio::buffer(multiple_choices); 56 | case reply::moved_permanently: 57 | return boost::asio::buffer(moved_permanently); 58 | case reply::moved_temporarily: 59 | return boost::asio::buffer(moved_temporarily); 60 | case reply::not_modified: 61 | return boost::asio::buffer(not_modified); 62 | case reply::bad_request: 63 | return boost::asio::buffer(bad_request); 64 | case reply::unauthorized: 65 | return boost::asio::buffer(unauthorized); 66 | case reply::forbidden: 67 | return boost::asio::buffer(forbidden); 68 | case reply::not_found: 69 | return boost::asio::buffer(not_found); 70 | case reply::internal_server_error: 71 | return boost::asio::buffer(internal_server_error); 72 | case reply::not_implemented: 73 | return boost::asio::buffer(not_implemented); 74 | case reply::bad_gateway: 75 | return boost::asio::buffer(bad_gateway); 76 | case reply::service_unavailable: 77 | return boost::asio::buffer(service_unavailable); 78 | default: 79 | return boost::asio::buffer(internal_server_error); 80 | } 81 | } 82 | 83 | } // status_strings 84 | 85 | namespace misc_strings { 86 | 87 | const char name_value_separator[] = { ':', ' ' }; 88 | const char crlf[] = { '\r', '\n' }; 89 | 90 | } // misc_strings 91 | 92 | std::vector reply::to_buffers() 93 | { 94 | std::vector buffers; 95 | buffers.push_back(status_strings::to_buffer(status)); 96 | for (std::size_t i = 0; i < headers.size(); ++i) 97 | { 98 | header& h = headers[i]; 99 | buffers.push_back(boost::asio::buffer(h.name)); 100 | buffers.push_back(boost::asio::buffer(misc_strings::name_value_separator)); 101 | buffers.push_back(boost::asio::buffer(h.value)); 102 | buffers.push_back(boost::asio::buffer(misc_strings::crlf)); 103 | } 104 | buffers.push_back(boost::asio::buffer(misc_strings::crlf)); 105 | buffers.push_back(boost::asio::buffer(content)); 106 | return buffers; 107 | } 108 | 109 | namespace stock_replies { 110 | 111 | const char ok[] = ""; 112 | const char created[] = 113 | "" 114 | "Created" 115 | "

201 Created

" 116 | ""; 117 | const char accepted[] = 118 | "" 119 | "Accepted" 120 | "

202 Accepted

" 121 | ""; 122 | const char no_content[] = 123 | "" 124 | "No Content" 125 | "

204 Content

" 126 | ""; 127 | const char multiple_choices[] = 128 | "" 129 | "Multiple Choices" 130 | "

300 Multiple Choices

" 131 | ""; 132 | const char moved_permanently[] = 133 | "" 134 | "Moved Permanently" 135 | "

301 Moved Permanently

" 136 | ""; 137 | const char moved_temporarily[] = 138 | "" 139 | "Moved Temporarily" 140 | "

302 Moved Temporarily

" 141 | ""; 142 | const char not_modified[] = 143 | "" 144 | "Not Modified" 145 | "

304 Not Modified

" 146 | ""; 147 | const char bad_request[] = 148 | "" 149 | "Bad Request" 150 | "

400 Bad Request

" 151 | ""; 152 | const char unauthorized[] = 153 | "" 154 | "Unauthorized" 155 | "

401 Unauthorized

" 156 | ""; 157 | const char forbidden[] = 158 | "" 159 | "Forbidden" 160 | "

403 Forbidden

" 161 | ""; 162 | const char not_found[] = 163 | "" 164 | "Not Found" 165 | "

404 Not Found

" 166 | ""; 167 | const char internal_server_error[] = 168 | "" 169 | "Internal Server Error" 170 | "

500 Internal Server Error

" 171 | ""; 172 | const char not_implemented[] = 173 | "" 174 | "Not Implemented" 175 | "

501 Not Implemented

" 176 | ""; 177 | const char bad_gateway[] = 178 | "" 179 | "Bad Gateway" 180 | "

502 Bad Gateway

" 181 | ""; 182 | const char service_unavailable[] = 183 | "" 184 | "Service Unavailable" 185 | "

503 Service Unavailable

" 186 | ""; 187 | 188 | std::string to_string(reply::status_type status) 189 | { 190 | switch (status) 191 | { 192 | case reply::ok: 193 | return ok; 194 | case reply::created: 195 | return created; 196 | case reply::accepted: 197 | return accepted; 198 | case reply::no_content: 199 | return no_content; 200 | case reply::multiple_choices: 201 | return multiple_choices; 202 | case reply::moved_permanently: 203 | return moved_permanently; 204 | case reply::moved_temporarily: 205 | return moved_temporarily; 206 | case reply::not_modified: 207 | return not_modified; 208 | case reply::bad_request: 209 | return bad_request; 210 | case reply::unauthorized: 211 | return unauthorized; 212 | case reply::forbidden: 213 | return forbidden; 214 | case reply::not_found: 215 | return not_found; 216 | case reply::internal_server_error: 217 | return internal_server_error; 218 | case reply::not_implemented: 219 | return not_implemented; 220 | case reply::bad_gateway: 221 | return bad_gateway; 222 | case reply::service_unavailable: 223 | return service_unavailable; 224 | default: 225 | return internal_server_error; 226 | } 227 | } 228 | 229 | } // stock_replies 230 | 231 | reply reply::stock_reply(reply::status_type status) 232 | { 233 | reply rep; 234 | rep.status = status; 235 | rep.content = stock_replies::to_string(status); 236 | rep.headers.resize(2); 237 | rep.headers[0].name = "Content-Length"; 238 | rep.headers[0].value = boost::lexical_cast(rep.content.size()); 239 | rep.headers[1].name = "Content-Type"; 240 | rep.headers[1].value = "text/html"; 241 | return rep; 242 | } 243 | 244 | }} // moost::http 245 | -------------------------------------------------------------------------------- /src/moost/http/request_parser.cpp: -------------------------------------------------------------------------------- 1 | #include "moost/http/request_parser.hpp" 2 | #include "moost/http/request.hpp" 3 | 4 | #include 5 | 6 | using namespace moost::http; 7 | 8 | request_parser::request_parser() 9 | : state_(method_start), 10 | content_to_read_(0) 11 | { 12 | } 13 | 14 | void request_parser::reset() 15 | { 16 | state_ = method_start; 17 | content_to_read_ = 0; 18 | } 19 | 20 | boost::tribool request_parser::parse_content_length(request & req) 21 | { 22 | std::vector
::iterator it = req.find_header("Content-Length"); 23 | 24 | if (it == req.headers.end()) 25 | return true; 26 | try 27 | { 28 | content_to_read_ = boost::lexical_cast(it->value); 29 | } 30 | catch (boost::bad_lexical_cast &) 31 | { 32 | return false; 33 | } 34 | return true; 35 | } 36 | 37 | boost::tribool request_parser::consume_header(request& req, char input) 38 | { 39 | switch (state_) 40 | { 41 | case method_start: 42 | if (!is_char(input) || is_ctl(input) || is_tspecial(input)) 43 | { 44 | return false; 45 | } 46 | else 47 | { 48 | state_ = method; 49 | req.method.push_back(input); 50 | return boost::indeterminate; 51 | } 52 | case method: 53 | if (input == ' ') 54 | { 55 | state_ = uri; 56 | return boost::indeterminate; 57 | } 58 | else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) 59 | { 60 | return false; 61 | } 62 | else 63 | { 64 | req.method.push_back(input); 65 | return boost::indeterminate; 66 | } 67 | case uri_start: 68 | if (is_ctl(input)) 69 | { 70 | return false; 71 | } 72 | else 73 | { 74 | state_ = uri; 75 | req.uri.push_back(input); 76 | return boost::indeterminate; 77 | } 78 | case uri: 79 | if (input == ' ') 80 | { 81 | state_ = http_version_h; 82 | return boost::indeterminate; 83 | } 84 | else if (is_ctl(input)) 85 | { 86 | return false; 87 | } 88 | else 89 | { 90 | req.uri.push_back(input); 91 | return boost::indeterminate; 92 | } 93 | case http_version_h: 94 | if (input == 'H') 95 | { 96 | state_ = http_version_t_1; 97 | return boost::indeterminate; 98 | } 99 | else 100 | { 101 | return false; 102 | } 103 | case http_version_t_1: 104 | if (input == 'T') 105 | { 106 | state_ = http_version_t_2; 107 | return boost::indeterminate; 108 | } 109 | else 110 | { 111 | return false; 112 | } 113 | case http_version_t_2: 114 | if (input == 'T') 115 | { 116 | state_ = http_version_p; 117 | return boost::indeterminate; 118 | } 119 | else 120 | { 121 | return false; 122 | } 123 | case http_version_p: 124 | if (input == 'P') 125 | { 126 | state_ = http_version_slash; 127 | return boost::indeterminate; 128 | } 129 | else 130 | { 131 | return false; 132 | } 133 | case http_version_slash: 134 | if (input == '/') 135 | { 136 | req.http_version_major = 0; 137 | req.http_version_minor = 0; 138 | state_ = http_version_major_start; 139 | return boost::indeterminate; 140 | } 141 | else 142 | { 143 | return false; 144 | } 145 | case http_version_major_start: 146 | if (is_digit(input)) 147 | { 148 | req.http_version_major = req.http_version_major * 10 + input - '0'; 149 | state_ = http_version_major; 150 | return boost::indeterminate; 151 | } 152 | else 153 | { 154 | return false; 155 | } 156 | case http_version_major: 157 | if (input == '.') 158 | { 159 | state_ = http_version_minor_start; 160 | return boost::indeterminate; 161 | } 162 | else if (is_digit(input)) 163 | { 164 | req.http_version_major = req.http_version_major * 10 + input - '0'; 165 | return boost::indeterminate; 166 | } 167 | else 168 | { 169 | return false; 170 | } 171 | case http_version_minor_start: 172 | if (is_digit(input)) 173 | { 174 | req.http_version_minor = req.http_version_minor * 10 + input - '0'; 175 | state_ = http_version_minor; 176 | return boost::indeterminate; 177 | } 178 | else 179 | { 180 | return false; 181 | } 182 | case http_version_minor: 183 | if (input == '\r') 184 | { 185 | state_ = expecting_newline_1; 186 | return boost::indeterminate; 187 | } 188 | else if (is_digit(input)) 189 | { 190 | req.http_version_minor = req.http_version_minor * 10 + input - '0'; 191 | return boost::indeterminate; 192 | } 193 | else 194 | { 195 | return false; 196 | } 197 | case expecting_newline_1: 198 | if (input == '\n') 199 | { 200 | state_ = header_line_start; 201 | return boost::indeterminate; 202 | } 203 | else 204 | { 205 | return false; 206 | } 207 | case header_line_start: 208 | if (input == '\r') 209 | { 210 | state_ = expecting_newline_3; 211 | return boost::indeterminate; 212 | } 213 | else if (!req.headers.empty() && (input == ' ' || input == '\t')) 214 | { 215 | state_ = header_lws; 216 | return boost::indeterminate; 217 | } 218 | else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) 219 | { 220 | return false; 221 | } 222 | else 223 | { 224 | req.headers.push_back(header()); 225 | req.headers.back().name.push_back(input); 226 | state_ = header_name; 227 | return boost::indeterminate; 228 | } 229 | case header_lws: 230 | if (input == '\r') 231 | { 232 | state_ = expecting_newline_2; 233 | return boost::indeterminate; 234 | } 235 | else if (input == ' ' || input == '\t') 236 | { 237 | return boost::indeterminate; 238 | } 239 | else if (is_ctl(input)) 240 | { 241 | return false; 242 | } 243 | else 244 | { 245 | state_ = header_value; 246 | req.headers.back().value.push_back(input); 247 | return boost::indeterminate; 248 | } 249 | case header_name: 250 | if (input == ':') 251 | { 252 | state_ = space_before_header_value; 253 | return boost::indeterminate; 254 | } 255 | else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) 256 | { 257 | return false; 258 | } 259 | else 260 | { 261 | req.headers.back().name.push_back(input); 262 | return boost::indeterminate; 263 | } 264 | case space_before_header_value: 265 | if (input == ' ') 266 | { 267 | state_ = header_value; 268 | return boost::indeterminate; 269 | } 270 | else 271 | { 272 | return false; 273 | } 274 | case header_value: 275 | if (input == '\r') 276 | { 277 | state_ = expecting_newline_2; 278 | return boost::indeterminate; 279 | } 280 | else if (is_ctl(input)) 281 | { 282 | return false; 283 | } 284 | else 285 | { 286 | req.headers.back().value.push_back(input); 287 | return boost::indeterminate; 288 | } 289 | case expecting_newline_2: 290 | if (input == '\n') 291 | { 292 | state_ = header_line_start; 293 | return boost::indeterminate; 294 | } 295 | else 296 | { 297 | return false; 298 | } 299 | case expecting_newline_3: 300 | if (input == '\n') 301 | { 302 | state_ = header_end; 303 | return parse_content_length(req); // as a final step, parse the content length 304 | } 305 | else 306 | { 307 | return false; 308 | } 309 | default: 310 | return false; 311 | } 312 | } 313 | 314 | bool request_parser::is_char(int c) 315 | { 316 | return c >= 0 && c <= 127; 317 | } 318 | 319 | bool request_parser::is_ctl(int c) 320 | { 321 | return (c >= 0 && c <= 31) || c == 127; 322 | } 323 | 324 | bool request_parser::is_tspecial(int c) 325 | { 326 | switch (c) 327 | { 328 | case '(': case ')': case '<': case '>': case '@': 329 | case ',': case ';': case ':': case '\\': case '"': 330 | case '/': case '[': case ']': case '?': case '=': 331 | case '{': case '}': case ' ': case '\t': 332 | return true; 333 | default: 334 | return false; 335 | } 336 | } 337 | 338 | bool request_parser::is_digit(int c) 339 | { 340 | return c >= '0' && c <= '9'; 341 | } 342 | -------------------------------------------------------------------------------- /src/pfp/config.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace com::wavii::pfp; 4 | 5 | const char * consts::version = "0.1"; 6 | const float consts::score_resolution = 50.0f; 7 | const score_t consts::empty_score = -32768; 8 | const count_t consts::smooth_threshold = 100; 9 | const float consts::word_smooth_factor = 0.2f; 10 | const float consts::sig_smooth_factor = 1.0f; 11 | const state_t consts::goal_state = 411; 12 | const state_t consts::boundary_state = 412; 13 | const score_t consts::epsilon = 0; 14 | -------------------------------------------------------------------------------- /src/pfpc/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace com::wavii::pfp; 19 | using namespace boost; 20 | namespace fs = boost::filesystem; 21 | 22 | template 23 | void load(T & obj, boost::filesystem::path p) 24 | { 25 | if (!fs::exists(p)) 26 | throw std::runtime_error("can't find " + p.string()); 27 | std::ifstream in(p.string().c_str()); 28 | obj.load(in); 29 | } 30 | 31 | int main(int argc, char * argv[]) 32 | { 33 | std::clog << "pfpc: command line interface for pfp!" << std::endl; 34 | std::clog << "build: " << __DATE__ << " (" << __TIME__ << ") of pfp version " << consts::version << " (c) Wavii,Inc. 2010" << std::endl; 35 | std::clog << "usage: " << argv[0] << " " << std::endl; 36 | 37 | size_t sentence_length = argc < 2 ? 45 : lexical_cast(argv[1]); 38 | std::string data_dir = argc < 3 ? "/usr/share/pfp/" : argv[2]; // make install copies files to /usr/share/pfp by default 39 | 40 | tokenizer tokenizer; 41 | state_list states; 42 | lexicon lexicon(states); 43 | unary_grammar ug(states); 44 | binary_grammar bg(states); 45 | pcfg_parser pcfg(states, ug, bg); 46 | 47 | std::clog << "loading lexicon and grammar" << std::endl; 48 | load(tokenizer, fs::path(data_dir) / "americanizations"); 49 | load(states, fs::path(data_dir) / "states"); 50 | { 51 | fs::path ps[] = { fs::path(data_dir) / "words", fs::path(data_dir) / "sigs", fs::path(data_dir) / "word_state", fs::path(data_dir) / "sig_state" }; 52 | std::ifstream ins[4]; 53 | for (int i = 0; i != 4; ++i) 54 | { 55 | if (!fs::exists(ps[i])) 56 | throw std::runtime_error("can't find " + ps[i].string()); 57 | ins[i].open(ps[i].string().c_str()); 58 | } 59 | lexicon.load(ins[0], ins[1], ins[2], ins[3]); 60 | } 61 | load(ug, fs::path(data_dir) / "unary_rules"); 62 | load(bg, fs::path(data_dir) / "binary_rules"); 63 | workspace w(sentence_length, states.size()); 64 | 65 | std::clog << "ready! enter lines to parse:" << std::endl; 66 | for (std::string sentence; std::getline(std::cin, sentence); ) 67 | { 68 | std::vector< std::string > words; 69 | std::vector< std::pair< state_t, float > > state_weight; 70 | std::vector< std::vector< state_score_t > > sentence_f; 71 | node result; 72 | tokenizer.tokenize(sentence, words); 73 | for (std::vector< std::string >::const_iterator it = words.begin(); it != words.end(); ++it) 74 | { 75 | state_weight.clear(); lexicon.score(*it, std::back_inserter(state_weight)); 76 | sentence_f.push_back(std::vector< state_score_t >(state_weight.size())); 77 | // scale by score_resolution in case we are downcasting our weights 78 | for (size_t i = 0; i != state_weight.size(); ++i) 79 | sentence_f.back()[i] = state_score_t(state_weight[i].first, state_weight[i].second * consts::score_resolution); 80 | } 81 | // add the boundary symbol 82 | sentence_f.push_back( std::vector< state_score_t >(1, state_score_t(consts::boundary_state, 0.0f))); 83 | // and parse! 84 | if (!pcfg.parse(sentence_f, w, result)) 85 | std::cout << std::endl; 86 | // stitch together the results 87 | std::ostringstream oss; 88 | std::vector< std::string >::iterator word_it = words.begin(); 89 | stitch(oss, result, word_it, states); 90 | std::cout << oss.str() << std::endl; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/pfpc/pfpc_token.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace com::wavii::pfp; 19 | using namespace boost; 20 | namespace fs = boost::filesystem; 21 | 22 | template 23 | void load(T & obj, boost::filesystem::path p) 24 | { 25 | if (!fs::exists(p)) 26 | throw std::runtime_error("can't find " + p.string()); 27 | std::ifstream in(p.string().c_str()); 28 | obj.load(in); 29 | } 30 | 31 | int main(int argc, char * argv[]) 32 | { 33 | std::clog << "pfpc_token: command line interface for pfp!" << std::endl; 34 | std::clog << "build: " << __DATE__ << " (" << __TIME__ << ") of pfp version " << consts::version << " (c) Wavii,Inc. 2012" << std::endl; 35 | std::clog << "usage: " << argv[0] << " " << std::endl; 36 | 37 | size_t sentence_length = argc < 2 ? 45 : lexical_cast(argv[1]); 38 | std::string data_dir = argc < 3 ? "/usr/share/pfp/" : argv[2]; // make install copies files to /usr/share/pfp by default 39 | 40 | state_list states; 41 | lexicon lexicon(states); 42 | unary_grammar ug(states); 43 | binary_grammar bg(states); 44 | pcfg_parser pcfg(states, ug, bg); 45 | 46 | std::clog << "loading lexicon and grammar" << std::endl; 47 | load(states, fs::path(data_dir) / "states"); 48 | { 49 | fs::path ps[] = { fs::path(data_dir) / "words", fs::path(data_dir) / "sigs", fs::path(data_dir) / "word_state", fs::path(data_dir) / "sig_state" }; 50 | std::ifstream ins[4]; 51 | for (int i = 0; i != 4; ++i) 52 | { 53 | if (!fs::exists(ps[i])) 54 | throw std::runtime_error("can't find " + ps[i].string()); 55 | ins[i].open(ps[i].string().c_str()); 56 | } 57 | lexicon.load(ins[0], ins[1], ins[2], ins[3]); 58 | } 59 | load(ug, fs::path(data_dir) / "unary_rules"); 60 | load(bg, fs::path(data_dir) / "binary_rules"); 61 | workspace w(sentence_length, states.size()); 62 | 63 | std::vector< std::string > words; 64 | std::clog << "ready! enter each token per line, empty line to finish the sentence:" << std::endl; 65 | for (std::string word; std::getline(std::cin, word); ) { 66 | boost::trim(word); 67 | if (word.empty()) 68 | break; 69 | words.push_back(word); 70 | } 71 | 72 | std::vector< std::pair< state_t, float > > state_weight; 73 | std::vector< std::vector< state_score_t > > sentence_f; 74 | node result; 75 | 76 | for (std::vector< std::string >::const_iterator it = words.begin(); it != words.end(); ++it) 77 | { 78 | state_weight.clear(); 79 | lexicon.score(*it, std::back_inserter(state_weight)); 80 | sentence_f.push_back(std::vector< state_score_t >(state_weight.size())); 81 | // scale by score_resolution in case we are downcasting our weights 82 | for (size_t i = 0; i != state_weight.size(); ++i) 83 | sentence_f.back()[i] = state_score_t(state_weight[i].first, state_weight[i].second * consts::score_resolution); 84 | } 85 | // add the boundary symbol 86 | sentence_f.push_back( std::vector< state_score_t >(1, state_score_t(consts::boundary_state, 0.0f))); 87 | // and parse! 88 | if (!pcfg.parse(sentence_f, w, result)) 89 | std::cout << "Sorry, pfp couldn't work out the result!" << std::endl; 90 | // stitch together the results 91 | std::ostringstream oss; 92 | std::vector< std::string >::iterator word_it = words.begin(); 93 | stitch(oss, result, word_it, states); 94 | std::cout << oss.str() << std::endl; 95 | } 96 | -------------------------------------------------------------------------------- /src/pfpd/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | using namespace com::wavii::pfp; 9 | using namespace boost; 10 | using namespace moost; 11 | 12 | int main(int argc, char * argv[]) 13 | { 14 | std::clog << "pfpd: http server for pfp!" << std::endl; 15 | std::clog << "build: " << __DATE__ << " (" << __TIME__ << ") of pfp version " << consts::version << " (c) Wavii,Inc. 2010" << std::endl; 16 | 17 | if (argc < 3) 18 | { 19 | std::cerr << "usage: " << argv[0] << " " << std::endl; 20 | exit(1); 21 | } 22 | std::string host = argv[1]; 23 | int port = lexical_cast(argv[2]); 24 | size_t sentence_length = argc < 4 ? 45 : lexical_cast(argv[3]); 25 | size_t threads = argc < 5 ? 1 : lexical_cast(argv[4]); 26 | std::string data_dir = argc < 6 ? "/usr/share/pfp/" : argv[5]; // make install copies files to /usr/share/pfp by default 27 | 28 | http::server server(host, port, threads); 29 | try 30 | { 31 | server.request_handler().init(sentence_length, threads, data_dir); 32 | } catch (const std::runtime_error & e) 33 | { 34 | std::cerr << "error: " << e.what() << std::endl; 35 | return 1; 36 | } 37 | std::clog << "starting http service." << std::endl; 38 | server.run(); // our blocking event loop 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /src/pfpd/pfpd_handler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace com::wavii::pfp; 14 | using namespace moost::http; 15 | namespace fs = boost::filesystem; 16 | 17 | pfpd_handler::pfpd_handler() 18 | : lexicon_(states_), 19 | ug_(states_), 20 | bg_(states_), 21 | pcfg_(states_, ug_, bg_) 22 | { 23 | } 24 | 25 | // load a file and err reasonably if not found 26 | template 27 | void load(T & obj, boost::filesystem::path p) 28 | { 29 | if (!boost::filesystem::exists(p)) 30 | throw std::runtime_error("can't find " + p.string()); 31 | std::clog << "loading " << p.string() << std::endl; 32 | std::ifstream in(p.string().c_str()); 33 | obj.load(in); 34 | } 35 | 36 | void pfpd_handler::init(size_t sentence_length, size_t threads, const std::string & data_dir) 37 | { 38 | timer_bucket_size_ = (sentence_length + 9) / 10; 39 | std::clog << "loading lexicon and grammar" << std::endl; 40 | load(tokenizer_, fs::path(data_dir) / "americanizations"); 41 | load(states_, fs::path(data_dir) / "states"); 42 | { 43 | fs::path ps[] = { fs::path(data_dir) / "words", fs::path(data_dir) / "sigs", fs::path(data_dir) / "word_state", fs::path(data_dir) / "sig_state" }; 44 | std::ifstream ins[4]; 45 | for (int i = 0; i != 4; ++i) 46 | { 47 | if (!fs::exists(ps[i])) 48 | throw std::runtime_error("can't find " + ps[i].string()); 49 | ins[i].open(ps[i].string().c_str()); 50 | } 51 | lexicon_.load(ins[0], ins[1], ins[2], ins[3]); 52 | } 53 | load(ug_, fs::path(data_dir) / "unary_rules"); 54 | load(bg_, fs::path(data_dir) / "binary_rules"); 55 | std::clog << "allocating " << threads << " workspaces of sentence-length " << sentence_length << std::endl; 56 | while (threads-- != 0) 57 | workspaces_.add_resource(new workspace(sentence_length, states_.size())); 58 | } 59 | 60 | bool pfpd_handler::url_decode(const std::string& in, std::string& out) 61 | { 62 | // TODO: URL-encoding maps to ISO-9somethingsomething codepage 63 | // do i need to convert to utf-8 here or is everything kosher? 64 | out.clear(); 65 | out.reserve(in.size()); 66 | for (std::size_t i = 0; i < in.size(); ++i) 67 | { 68 | if (in[i] == '%') 69 | { 70 | if (i + 3 <= in.size()) 71 | { 72 | int value; 73 | if (std::istringstream(in.substr(i + 1, 2)) >> std::hex >> value) 74 | { 75 | out += static_cast(value); 76 | i += 2; 77 | } 78 | else 79 | return false; 80 | } 81 | else 82 | return false; 83 | } 84 | else if (in[i] == '+') 85 | out += ' '; 86 | else 87 | out += in[i]; 88 | } 89 | return true; 90 | } 91 | 92 | void pfpd_handler::handle_request(const moost::http::request& req, moost::http::reply& rep) 93 | { 94 | std::string request_path; 95 | if (!url_decode(req.uri, request_path)) 96 | { 97 | rep = reply::stock_reply(reply::bad_request); 98 | return; 99 | } 100 | 101 | rep.headers.resize(2); 102 | rep.headers[0].name = "Content-Length"; 103 | rep.headers[1].name = "Content-Type"; 104 | rep.headers[1].value = "text/plain"; 105 | 106 | try 107 | { 108 | rep.status = reply::ok; 109 | if (request_path == "/version" || request_path == "/version/") 110 | rep.content = version(); 111 | else if (request_path.find("/console") == 0) 112 | { 113 | rep.content = console( request_path.substr(sizeof("/console") - 1) ); 114 | rep.headers[1].value = "text/html"; 115 | } 116 | else if (request_path.find("/parse/") == 0) 117 | rep.content = parse(request_path.substr(sizeof("/parse/") - 1)); 118 | else 119 | rep = reply::stock_reply(reply::not_found); 120 | } catch (const std::runtime_error & e) 121 | { 122 | std::cerr << "error: " << e.what() << std::endl; 123 | rep.content = ""; 124 | } 125 | 126 | // and finally the length 127 | rep.headers[0].value = boost::lexical_cast(rep.content.size()); 128 | } 129 | 130 | std::string pfpd_handler::version() 131 | { 132 | const char * e[] = 133 | { 134 | "sometimes when i freestyle, i lose confidence.", 135 | "i do not have any money so am sending you this drawing i did of a spider instead.", 136 | "deleted code is debugged code.", 137 | "the computing scientist's main challenge is not to get confused by the complexities of his own making.", 138 | "debugging is twice as hard as writing the code in the first place. therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it.", 139 | "censorship is telling a man he can't have a steak just because a baby can't chew it.", 140 | "whenever you find yourself on the side of the majority, it is time to pause and reflect.", 141 | "if you don't know where you're going, any road will get you there.", 142 | "if you ever drop your keys into a river of molten lava, let'em go, because, man, they're gone.", 143 | "it takes a big man to cry, but it takes a bigger man to laugh at that man.", 144 | "consider the daffodil. and while you're doing that, i'll be over here, looking through your stuff." 145 | }; 146 | std::ostringstream oss; 147 | oss << "pfp version " << consts::version << ", build: " << __DATE__ << " (" << __TIME__ << ")"; 148 | oss << "\n\n" << e[rand() % (sizeof(e) / sizeof(const char *))]; 149 | return oss.str(); 150 | } 151 | 152 | std::string pfpd_handler::console(const std::string & query) 153 | { 154 | std::ostringstream oss; 155 | std::string q; 156 | if (query.find("?q=") != std::string::npos) 157 | q = boost::algorithm::trim_copy(query.substr(query.find("?q=") + 3)); 158 | oss << "\npfp console\n

Hey everybody let's parse!

\n
"; 159 | oss << "
\n
\n"; 162 | if (!q.empty()) 163 | { 164 | std::string o; 165 | try { o = parse(q); } 166 | catch (const std::runtime_error & e) { o = std::string("whoops: ") + e.what(); } 167 | oss << "

result:

" << o << std::endl; 168 | } 169 | oss << "\n"; 170 | return oss.str(); 171 | } 172 | 173 | std::string pfpd_handler::parse(const std::string & sentence) 174 | { 175 | // befirst, get a workspace 176 | resource_stack::scoped_resource pw(workspaces_); 177 | // now some words 178 | std::vector< std::string > words; 179 | std::vector< std::pair< state_t, float > > state_weight; 180 | std::vector< std::vector< state_score_t > > sentence_f; 181 | node result; 182 | tokenizer_.tokenize(sentence, words); 183 | for (std::vector< std::string >::const_iterator it = words.begin(); it != words.end(); ++it) 184 | { 185 | state_weight.clear(); lexicon_.score(*it, std::back_inserter(state_weight)); 186 | sentence_f.push_back(std::vector< state_score_t >(state_weight.size())); 187 | // scale by score_resolution in case we are downcasting our weights 188 | for (size_t i = 0; i != state_weight.size(); ++i) 189 | sentence_f.back()[i] = state_score_t(state_weight[i].first, state_weight[i].second * consts::score_resolution); 190 | } 191 | // add the boundary symbol 192 | sentence_f.push_back( std::vector< state_score_t >(1, state_score_t(consts::boundary_state, 0.0f))); 193 | // and parse! 194 | if (!pcfg_.parse(sentence_f, *pw, result)) 195 | return ""; 196 | // stitch together the results 197 | std::ostringstream oss; 198 | stitch(oss, result, words.begin(), states_); 199 | return oss.str(); 200 | } 201 | 202 | -------------------------------------------------------------------------------- /src/pypfp/pypfp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace com::wavii::pfp; 9 | using namespace boost::python; 10 | namespace fs = boost::filesystem; 11 | 12 | template 13 | void load(T & obj, fs::path p) 14 | { 15 | if (!fs::exists(p)) 16 | throw std::runtime_error("can't find " + p.string()); 17 | std::ifstream in(p.string().c_str()); 18 | obj.load(in); 19 | } 20 | 21 | pypfp::pypfp() 22 | : lexicon_(states_), 23 | ug_(states_), 24 | bg_(states_), 25 | pcfg_(states_, ug_, bg_) 26 | { 27 | init(); 28 | } 29 | 30 | pypfp::pypfp(size_t sentence_length) 31 | : lexicon_(states_), 32 | ug_(states_), 33 | bg_(states_), 34 | pcfg_(states_, ug_, bg_) 35 | { 36 | init(sentence_length); 37 | } 38 | 39 | pypfp::pypfp(size_t sentence_length, const std::string & data_dir) 40 | : lexicon_(states_), 41 | ug_(states_), 42 | bg_(states_), 43 | pcfg_(states_, ug_, bg_) 44 | { 45 | init(sentence_length, data_dir); 46 | } 47 | 48 | void pypfp::init(size_t sentence_length /*= 45*/, const std::string & data_dir /*= ""*/) 49 | { 50 | std::string pfp_path = extract(import("pfp").attr("__file__")); 51 | fs::path data_dir_p = data_dir.empty() ? fs::path(pfp_path).parent_path() / "share" : data_dir; 52 | 53 | load(tokenizer_, data_dir_p / "americanizations"); 54 | load(states_, data_dir_p / "states"); 55 | { 56 | fs::path ps[] = { data_dir_p / "words", data_dir_p / "sigs", data_dir_p / "word_state", data_dir_p / "sig_state" }; 57 | std::ifstream ins[4]; 58 | for (int i = 0; i != 4; ++i) 59 | { 60 | if (!fs::exists(ps[i])) 61 | throw std::runtime_error("can't find " + ps[i].string()); 62 | ins[i].open(ps[i].string().c_str()); 63 | } 64 | lexicon_.load(ins[0], ins[1], ins[2], ins[3]); 65 | } 66 | load(ug_, data_dir_p / "unary_rules"); 67 | load(bg_, data_dir_p / "binary_rules"); 68 | pworkspace_.reset(new workspace(sentence_length, states_.size())); 69 | } 70 | 71 | std::string pypfp::_parse_tokens(const std::vector& words) 72 | { 73 | std::vector< std::pair< state_t, float > > state_weight; 74 | std::vector< std::vector< state_score_t > > sentence_f; 75 | node result; 76 | 77 | for (std::vector< std::string >::const_iterator it = words.begin(); it != words.end(); ++it) 78 | { 79 | state_weight.clear(); lexicon_.score(*it, std::back_inserter(state_weight)); 80 | sentence_f.push_back(std::vector< state_score_t >(state_weight.size())); 81 | // scale by score_resolution in case we are downcasting our weights 82 | for (size_t i = 0; i != state_weight.size(); ++i) 83 | sentence_f.back()[i] = state_score_t(state_weight[i].first, state_weight[i].second * consts::score_resolution); 84 | } 85 | // add the boundary symbol 86 | sentence_f.push_back( std::vector< state_score_t >(1, state_score_t(consts::boundary_state, 0.0f))); 87 | // and parse! 88 | if (!pcfg_.parse(sentence_f, *pworkspace_, result)) 89 | return ""; 90 | // stitch together the results 91 | std::ostringstream oss; 92 | stitch(oss, result, words.begin(), states_); 93 | return oss.str(); 94 | } 95 | 96 | std::string pypfp::parse_tokens(const boost::python::list& words) 97 | { 98 | std::vector words_vec; 99 | size_t len = boost::python::len(words); 100 | 101 | // makes a copy of the content. Yes, I could avoid it by iterating but then 102 | // I'd have to use stl_iterator and make parse_tokens templated. 103 | // It's not really worth the hassle. 104 | for (size_t i = 0; i != len; ++i) 105 | words_vec.push_back(boost::python::extract(words[i])); 106 | 107 | return _parse_tokens(words_vec); 108 | } 109 | 110 | std::string pypfp::parse(const std::string & sentence) 111 | { 112 | // now some words 113 | std::vector< std::string > words; 114 | tokenizer_.tokenize(sentence, words); 115 | return _parse_tokens(words); 116 | } 117 | 118 | BOOST_PYTHON_MODULE(pfp) 119 | { 120 | class_("Parser", init<>()) 121 | .def(init(boost::python::args("max_sentence_len"))) 122 | .def(init(boost::python::args("max_sentence_len", "data_dir"))) 123 | .def("parse", &pypfp::parse, boost::python::args("self", "sentence"), 124 | "Will parse the given sentence") 125 | .def("parse_tokens", &pypfp::parse_tokens, boost::python::args("self", "tokens"), 126 | "Will parse the give tokens list") 127 | ; 128 | } 129 | -------------------------------------------------------------------------------- /src/test/lexicon.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace com::wavii::pfp; 13 | 14 | struct lexicon_test_fixture 15 | { 16 | std::vector< std::pair< state_t, float > > scores; 17 | state_list states; 18 | lexicon lex; 19 | lexicon_test_fixture() : states("./share/pfp/states"), lex(states, "./share/pfp/words", "./share/pfp/sigs", "./share/pfp/word_state", "./share/pfp/sig_state") {} 20 | }; 21 | 22 | BOOST_AUTO_TEST_SUITE( lexicon_test ) 23 | 24 | BOOST_FIXTURE_TEST_CASE( test_nothing, lexicon_test_fixture ) 25 | { 26 | lex.score("", std::back_inserter(scores)); 27 | BOOST_CHECK_EQUAL(scores.size(), 18); 28 | } 29 | 30 | // known word, no smoothing 31 | BOOST_FIXTURE_TEST_CASE( test_The, lexicon_test_fixture ) 32 | { 33 | lex.score("The", std::back_inserter(scores)); 34 | BOOST_REQUIRE_EQUAL(scores.size(), 5); 35 | BOOST_CHECK_EQUAL( scores[0].first, 151 ); 36 | BOOST_CHECK_CLOSE( scores[0].second, -7.8324527f, 0.0001f ); 37 | BOOST_CHECK_EQUAL( scores[1].first, 303 ); 38 | BOOST_CHECK_CLOSE( scores[1].second, -5.7462034f, 0.0001f ); 39 | BOOST_CHECK_EQUAL( scores[2].first, 901 ); 40 | BOOST_CHECK_CLOSE( scores[2].second, -2.4547340f, 0.0001f ); 41 | BOOST_CHECK_EQUAL( scores[3].first, 2220 ); 42 | BOOST_CHECK_CLOSE( scores[3].second, -0.3566749f, 0.0001f ); 43 | BOOST_CHECK_EQUAL( scores[4].first, 5801 ); 44 | BOOST_CHECK_CLOSE( scores[4].second, -1.4586150f, 0.0001f ); 45 | } 46 | 47 | // known word, smoothing 48 | BOOST_FIXTURE_TEST_CASE( test_promotional, lexicon_test_fixture ) 49 | { 50 | lex.score("promotional", std::back_inserter(scores)); 51 | BOOST_REQUIRE_EQUAL(scores.size(), 131); 52 | BOOST_CHECK_EQUAL( scores[0].first, 13 ); 53 | BOOST_CHECK_CLOSE( scores[0].second, -16.316783905029297f, 0.0001f ); 54 | BOOST_CHECK_EQUAL( scores[23].first, 125); 55 | BOOST_CHECK_CLOSE( scores[23].second, -14.719925880432129f, 0.0001f ); 56 | BOOST_CHECK_EQUAL( scores[36].first, 169); 57 | BOOST_CHECK_CLOSE( scores[36].second, -8.140656471252441f, 0.0001f ); 58 | BOOST_CHECK_EQUAL( scores[130].first, 11337); 59 | BOOST_CHECK_CLOSE( scores[130].second, -12.640483856201172f, 0.0001f ); 60 | } 61 | 62 | // unknown word, found sig 63 | BOOST_FIXTURE_TEST_CASE( test_phalanxes, lexicon_test_fixture ) 64 | { 65 | lex.score("phalanxes", std::back_inserter(scores)); 66 | BOOST_REQUIRE_EQUAL(scores.size(), 18); 67 | BOOST_CHECK_EQUAL( scores[0].first, 77 ); 68 | BOOST_CHECK_CLOSE( scores[0].second, -11.172365188598633f, 0.0001f ); 69 | BOOST_CHECK_EQUAL( scores[4].first, 137 ); 70 | BOOST_CHECK_CLOSE( scores[4].second, -11.44262981414795f, 0.0001f ); 71 | BOOST_CHECK_EQUAL( scores[17].first, 833 ); 72 | BOOST_CHECK_CLOSE( scores[17].second, -20.891258239746094f, 0.0001f ); 73 | 74 | } 75 | 76 | BOOST_AUTO_TEST_SUITE_END() 77 | -------------------------------------------------------------------------------- /src/test/main.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MAIN 3 | #define BOOST_TEST_MODULE pfp_tests 4 | #include 5 | -------------------------------------------------------------------------------- /src/test/pcfg_parser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace com::wavii::pfp; 16 | 17 | BOOST_AUTO_TEST_SUITE( pcfg_parser_test ) 18 | 19 | BOOST_AUTO_TEST_CASE( test_pcfg_parser ) 20 | { 21 | state_list states("./share/pfp/states"); 22 | unary_grammar ug(states, "./share/pfp/unary_rules"); 23 | binary_grammar bg(states, "./share/pfp/binary_rules"); 24 | pcfg_parser pcfg(states, ug, bg); 25 | std::vector< std::vector< state_score_t > > sentence; 26 | 27 | { 28 | std::ifstream in("./etc/test/sample_input"); 29 | std::string line; 30 | float f; 31 | while (std::getline(in, line)) 32 | { 33 | std::istringstream iss(line); 34 | std::vector< state_score_t > word; 35 | state_score_t ss; 36 | while (iss >> ss.state >> f) 37 | { 38 | ss.score = static_cast(f * consts::score_resolution); 39 | word.push_back(ss); 40 | } 41 | std::sort(word.begin(), word.end()); 42 | sentence.push_back(word); 43 | } 44 | } 45 | 46 | node result; 47 | workspace ws(sentence.size(), states.size()); 48 | BOOST_REQUIRE_EQUAL( pcfg.parse(sentence, ws, result), true ); 49 | BOOST_REQUIRE_EQUAL( result.state, consts::goal_state ); 50 | BOOST_REQUIRE_EQUAL( result.children.size(), 2 ); 51 | BOOST_CHECK_EQUAL( result.children[1]->state, consts::boundary_state ); 52 | BOOST_CHECK_EQUAL( states[result.children[0]->state].tag, "S^ROOT-v" ); 53 | } 54 | 55 | BOOST_AUTO_TEST_SUITE_END() 56 | -------------------------------------------------------------------------------- /src/test/pfp.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | using namespace com::wavii::pfp; 23 | using namespace boost; 24 | namespace fs = boost::filesystem; 25 | 26 | template 27 | void load(T & obj, boost::filesystem::path p) 28 | { 29 | if (!fs::exists(p)) 30 | throw std::runtime_error("can't find " + p.string()); 31 | std::ifstream in(p.string().c_str()); 32 | obj.load(in); 33 | } 34 | 35 | BOOST_AUTO_TEST_SUITE( pfp_parser_test ) 36 | 37 | BOOST_AUTO_TEST_CASE( test_pfp_hash ) 38 | { 39 | tokenizer tokenizer; 40 | state_list states; 41 | lexicon lexicon(states); 42 | unary_grammar ug(states); 43 | binary_grammar bg(states); 44 | pcfg_parser pcfg(states, ug, bg); 45 | 46 | const std::string data_dir = "./share/pfp"; 47 | load(tokenizer, fs::path(data_dir) / "americanizations"); 48 | load(states, fs::path(data_dir) / "states"); 49 | { 50 | fs::path ps[] = { fs::path(data_dir) / "words", fs::path(data_dir) / "sigs", fs::path(data_dir) / "word_state", fs::path(data_dir) / "sig_state" }; 51 | std::ifstream ins[4]; 52 | for (int i = 0; i != 4; ++i) 53 | { 54 | if (!fs::exists(ps[i])) 55 | throw std::runtime_error("can't find " + ps[i].string()); 56 | ins[i].open(ps[i].string().c_str()); 57 | } 58 | lexicon.load(ins[0], ins[1], ins[2], ins[3]); 59 | } 60 | load(ug, fs::path(data_dir) / "unary_rules"); 61 | load(bg, fs::path(data_dir) / "binary_rules"); 62 | workspace w(45, states.size()); 63 | 64 | const std::string sentence= "Description This 2005 Nissan Altima available from Rama Auto Inc with Stock # 330051 is priced at $ 9500.00 ."; 65 | const std::string parsed = "(ROOT (S (S (VP (VBG Description) (NP (DT This) (CD 2005) (NNP Nissan) (NNP Altima)) (ADJP (JJ available) (PP (IN from) (NP (NP (NNP Rama) (NNP Auto) (NNP Inc)) (PP (IN with) (NP (NNP Stock)))))))) (NP (# #) (CD 330051)) (VP (VBZ is) (VP-VBN-v (VBN priced) (PP (IN at) (NP ($ $) (CD 9500.00))))) (. .)) )"; 66 | 67 | { 68 | std::vector< std::string > words; 69 | std::vector< std::pair< state_t, float > > state_weight; 70 | std::vector< std::vector< state_score_t > > sentence_f; 71 | node result; 72 | tokenizer.tokenize(sentence, words); 73 | for (std::vector< std::string >::const_iterator it = words.begin(); it != words.end(); ++it) 74 | { 75 | state_weight.clear(); lexicon.score(*it, std::back_inserter(state_weight)); 76 | sentence_f.push_back(std::vector< state_score_t >(state_weight.size())); 77 | // scale by score_resolution in case we are downcasting our weights 78 | for (size_t i = 0; i != state_weight.size(); ++i) 79 | sentence_f.back()[i] = state_score_t(state_weight[i].first, state_weight[i].second * consts::score_resolution); 80 | } 81 | // add the boundary symbol 82 | sentence_f.push_back( std::vector< state_score_t >(1, state_score_t(consts::boundary_state, 0.0f))); 83 | // and parse! 84 | if (!pcfg.parse(sentence_f, w, result)) 85 | BOOST_FAIL("Parsing shouldn't fail!"); 86 | // stitch together the results 87 | std::ostringstream oss; 88 | std::vector< std::string >::iterator word_it = words.begin(); 89 | stitch(oss, result, word_it, states); 90 | 91 | BOOST_CHECK_EQUAL(oss.str(), parsed); 92 | } 93 | } 94 | 95 | BOOST_AUTO_TEST_SUITE_END() 96 | -------------------------------------------------------------------------------- /src/test/tokenizer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | using namespace com::wavii::pfp; 12 | 13 | struct tokenizer_test_fixture 14 | { 15 | std::vector words; 16 | tokenizer t; 17 | tokenizer_test_fixture() : t("./etc/americanizations") {} 18 | }; 19 | 20 | BOOST_AUTO_TEST_SUITE( tokenizer_test ) 21 | 22 | BOOST_FIXTURE_TEST_CASE( test_nothing, tokenizer_test_fixture ) 23 | { 24 | t.tokenize("", words); 25 | BOOST_CHECK_EQUAL(words.size(), 0); 26 | } 27 | 28 | BOOST_FIXTURE_TEST_CASE( test_sgml, tokenizer_test_fixture ) 29 | { 30 | t.tokenize("", words); 31 | BOOST_REQUIRE_EQUAL(words.size(), 1); 32 | BOOST_REQUIRE_EQUAL(words[0], ""); 33 | } 34 | 35 | BOOST_FIXTURE_TEST_CASE( test_mdash, tokenizer_test_fixture ) 36 | { 37 | t.tokenize("—", words); 38 | BOOST_REQUIRE_EQUAL(words.size(), 1); 39 | BOOST_REQUIRE_EQUAL(words[0], "--"); 40 | words.clear(); t.tokenize("\x96", words); 41 | BOOST_REQUIRE_EQUAL(words.size(), 1); 42 | BOOST_REQUIRE_EQUAL(words[0], "--"); 43 | words.clear(); t.tokenize("\xe2\x80\x93", words); 44 | BOOST_REQUIRE_EQUAL(words.size(), 1); 45 | BOOST_REQUIRE_EQUAL(words[0], "--"); 46 | } 47 | 48 | BOOST_FIXTURE_TEST_CASE( test_amp, tokenizer_test_fixture ) 49 | { 50 | t.tokenize("hey &Amp; dude", words); 51 | BOOST_REQUIRE_EQUAL(words.size(), 3); 52 | BOOST_REQUIRE_EQUAL(words[1], "&"); 53 | words.clear(); t.tokenize("b&b", words); 54 | BOOST_REQUIRE_EQUAL(words.size(), 1); 55 | BOOST_REQUIRE_EQUAL(words[0], "b&b"); 56 | words.clear(); t.tokenize("b&Amp;b", words); 57 | BOOST_REQUIRE_EQUAL(words.size(), 1); 58 | BOOST_REQUIRE_EQUAL(words[0], "b&b"); 59 | } 60 | 61 | BOOST_FIXTURE_TEST_CASE( test_cant, tokenizer_test_fixture ) 62 | { 63 | t.tokenize("I CAN'T GET NO", words); 64 | BOOST_REQUIRE_EQUAL(words.size(), 5); 65 | BOOST_REQUIRE_EQUAL(words[1], "CA"); 66 | BOOST_REQUIRE_EQUAL(words[2], "N'T"); 67 | } 68 | 69 | BOOST_FIXTURE_TEST_CASE( test_dangit, tokenizer_test_fixture ) 70 | { 71 | t.tokenize("dang'it", words); 72 | BOOST_REQUIRE_EQUAL(words.size(), 3); 73 | BOOST_REQUIRE_EQUAL(words[1], "`"); 74 | } 75 | 76 | BOOST_FIXTURE_TEST_CASE( test_americanize, tokenizer_test_fixture ) 77 | { 78 | t.tokenize("haematologists devour the colour superleukaemia.", words); 79 | BOOST_REQUIRE_EQUAL(words.size(), 6); 80 | BOOST_REQUIRE_EQUAL(words[0], "hematologists"); 81 | BOOST_REQUIRE_EQUAL(words[1], "devour"); 82 | BOOST_REQUIRE_EQUAL(words[3], "color"); 83 | BOOST_REQUIRE_EQUAL(words[4], "superleukemia"); 84 | } 85 | 86 | BOOST_FIXTURE_TEST_CASE( test_escape, tokenizer_test_fixture ) 87 | { 88 | t.tokenize("this / that", words); 89 | BOOST_REQUIRE_EQUAL(words.size(), 3); 90 | BOOST_REQUIRE_EQUAL(words[1], "\\/"); 91 | words.clear(); t.tokenize("this \\/ that", words); 92 | BOOST_REQUIRE_EQUAL(words.size(), 3); 93 | BOOST_REQUIRE_EQUAL(words[1], "\\/"); 94 | } 95 | 96 | BOOST_FIXTURE_TEST_CASE( test_period, tokenizer_test_fixture ) 97 | { 98 | t.tokenize("Bob, Inc. bought a monkey.", words); 99 | BOOST_REQUIRE_EQUAL(words.size(), 7); 100 | BOOST_REQUIRE_EQUAL(words[2], "Inc."); 101 | BOOST_REQUIRE_EQUAL(words[3], "bought"); 102 | // TODO: get these tests to run when I can figure out how to get flex/jflex to play with eachother regarding case insensitivity 103 | /*words.clear(); t.tokenize("Bob, Inc. Bought a monkey.", words); 104 | BOOST_REQUIRE_EQUAL(words.size(), 8); 105 | BOOST_REQUIRE_EQUAL(words[2], "Inc."); 106 | BOOST_REQUIRE_EQUAL(words[3], ".");*/ 107 | } 108 | 109 | BOOST_AUTO_TEST_SUITE_END() 110 | --------------------------------------------------------------------------------