├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── ROUGE-1.5.5 ├── README.txt ├── RELEASE-NOTE.txt ├── ROUGE-1.5.5.pl ├── XML │ ├── DOM.pm │ ├── DOM │ │ ├── AttDef.pod │ │ ├── AttlistDecl.pod │ │ ├── Attr.pod │ │ ├── CDATASection.pod │ │ ├── CharacterData.pod │ │ ├── Comment.pod │ │ ├── DOMException.pm │ │ ├── DOMImplementation.pod │ │ ├── Document.pod │ │ ├── DocumentFragment.pod │ │ ├── DocumentType.pod │ │ ├── Element.pod │ │ ├── ElementDecl.pod │ │ ├── Entity.pod │ │ ├── EntityReference.pod │ │ ├── NamedNodeMap.pm │ │ ├── NamedNodeMap.pod │ │ ├── Node.pod │ │ ├── NodeList.pm │ │ ├── NodeList.pod │ │ ├── Notation.pod │ │ ├── Parser.pod │ │ ├── PerlSAX.pm │ │ ├── ProcessingInstruction.pod │ │ ├── Text.pod │ │ └── XMLDecl.pod │ ├── Handler │ │ └── BuildDOM.pm │ └── RegExp.pm ├── data │ ├── WordNet-2.0-Exceptions │ │ ├── WordNet-2.0.exc.db │ │ ├── adj.exc │ │ ├── adv.exc │ │ ├── buildExeptionDB.pl │ │ ├── noun.exc │ │ └── verb.exc │ ├── WordNet-2.0.exc.db │ └── smart_common_words.txt ├── runROUGE-test.pl └── test.xml ├── calRouge.py ├── example ├── candidate.txt └── reference.txt └── requirements.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # file 2 | .DS_Store 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ROUGE for Multilingual Summarization 2 | 3 | Since the original summarization metric [ROUGE](https://aclanthology.org/W04-1013/) is made only for English, we follow the method of [Hu et al.](https://aclanthology.org/D15-1229.pdf) and map words in other languages to numbers. 4 | 5 | Languages without spaces (eg. Chinese, Japanese) will be segmented by characters and others will be split by spaces. For example, the Chinese text is split by characters, and the English words and numbers will be split by space. 6 | 7 | ``` 8 | [Input] Surface Phone将装载Windows 10 (The Surface Phone will be loaded with Windows 10) 9 | [Segmentation] surface/phone/将/装/载/windows/10 10 | ``` 11 | 12 | ## Install 13 | 14 | ``` shell 15 | # install dependencies 16 | pip install -r requirements.txt 17 | 18 | # export environment paths 19 | export PYROUGE_HOME_DIR=$(pwd)/ROUGE-1.5.5 20 | export PYROUGE_TEMP_PATH=. 21 | 22 | # add permission 23 | chmod +x $PYROUGE_HOME_DIR/ROUGE-1.5.5.pl 24 | ``` 25 | 26 | ## Usage 27 | Each line in candidate/reference file should be a summary in the language ('-l') that consisting of sentences split by the delimiter (-d). '-t' indicates whether the text needs to be tokenized. 28 | 29 | ``` 30 | python3 calRouge.py -c example/candidate.txt -r example/reference.txt -l zh -d "" -t 31 | ``` 32 | 33 | ## ROUGE 34 | If you have problem in using ROUGE, you can check the complete installation commands. 35 | 36 | ``` shell 37 | sudo apt-get install libxml-perl libxml-dom-perl 38 | pip install git+git://github.com/bheinzerling/pyrouge 39 | export PYROUGE_HOME_DIR=the/path/to/RELEASE-1.5.5 40 | pyrouge_set_rouge_path $PYROUGE_HOME_DIR 41 | chmod +x $PYROUGE_HOME_DIR/ROUGE-1.5.5.pl 42 | ``` 43 | 44 | We have put the RELEASE-1.5.5 in the directory 'ROUGE-1.5.5'. You can also download [here](https://github.com/andersjo/pyrouge/tree/master/tools/ROUGE-1.5.5) by yourself. Remember to build Wordnet 2.0 instead of 1.6 in RELEASE-1.5.5/data: 45 | 46 | ```shell 47 | # remove old files 48 | rm $PYROUGE_HOME_DIR/data/WordNet-2.0-Exceptions/WordNet-2.0.exc.db 49 | rm $PYROUGE_HOME_DIR/data/WordNet-2.0.exc.db 50 | 51 | # create new files by yourself 52 | cd $PYROUGE_HOME_DIR/data/WordNet-2.0-Exceptions/ 53 | perl ./buildExeptionDB.pl . exc WordNet-2.0.exc.db 54 | cd ../ 55 | ln -s WordNet-2.0-Exceptions/WordNet-2.0.exc.db WordNet-2.0.exc.db 56 | ``` 57 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/README.txt: -------------------------------------------------------------------------------- 1 | A Brief Introduction of the ROUGE Summary Evaluation Package 2 | by Chin-Yew LIN 3 | Univeristy of Southern California/Information Sciences Institute 4 | 05/26/2005 5 | 6 | <> 7 | 8 | (1) Correct the resampling routine which ignores the last evaluation 9 | item in the evaluation list. Therefore, the average scores reported 10 | by ROUGE is only based on the first N-1 evaluation items. 11 | Thanks Barry Schiffman at Columbia University to report this bug. 12 | This bug only affects ROUGE-1.5.X. For pre-1.5 ROUGE, it only affects 13 | the computation of confidence interval (CI) estimation, i.e. CI is only 14 | estimated by the first N-1 evaluation items, but it *does not* affect 15 | average scores. 16 | (2) Correct stemming on multi-token BE heads and modifiers. 17 | Previously, only single token heads and modifiers were assumed. 18 | (3) Change read_text and read_text_LCS functions to read exact words or 19 | bytes required by users. Previous versions carry out whitespace 20 | compression and other string clear up actions before enforce the length 21 | limit. 22 | (4) Add the capability to score summaries in Basic Element (BE) 23 | format by using option "-3", standing for BE triple. There are 6 24 | different modes in BE scoring. We suggest using *"-3 HMR"* on BEs 25 | extracted from Minipar parse trees based on our correlation analysis 26 | of BE-based scoring vs. human judgements on DUC 2002 & 2003 automatic 27 | summaries. 28 | (5) ROUGE now generates three scores (recall, precision and F-measure) 29 | for each evaluation. Previously, only one score is generated 30 | (recall). Precision and F-measure scores are useful when the target 31 | summary length is not enforced. Only recall scores were necessary since 32 | DUC guideline dictated the limit on summary length. For comparison to 33 | previous DUC results, please use the recall scores. The default alpha 34 | weighting for computing F-measure is 0.5. Users can specify a 35 | particular alpha weighting that fits their application scenario using 36 | option "-p alpha-weight". Where *alpha-weight* is a number between 0 37 | and 1 inclusively. 38 | (6) Pre-1.5 version of ROUGE used model average to compute the overall 39 | ROUGE scores when there are multiple references. Starting from v1.5+, 40 | ROUGE provides an option to use the best matching score among the 41 | references as the final score. The model average option is specified 42 | using "-f A" (for Average) and the best model option is specified 43 | using "-f B" (for the Best). The "-f A" option is better when use 44 | ROUGE in summarization evaluations; while "-f B" option is better when 45 | use ROUGE in machine translation (MT) and definition 46 | question-answering (DQA) evaluations since in a typical MT or DQA 47 | evaluation scenario matching a single reference translation or 48 | definition answer is sufficient. However, it is very likely that 49 | multiple different but equally good summaries exist in summarization 50 | evaluation. 51 | (7) ROUGE v1.5+ also provides the option to specify whether model unit 52 | level average will be used (macro-average, i.e. treating every model 53 | unit equally) or token level average will be used (micro-average, 54 | i.e. treating every token equally). In summarization evaluation, we 55 | suggest using model unit level average and this is the default setting 56 | in ROUGE. To specify other average mode, use "-t 0" (default) for 57 | model unit level average, "-t 1" for token level average and "-t 2" 58 | for output raw token counts in models, peers, and matches. 59 | (8) ROUGE now offers the option to use file list as the configuration 60 | file. The input format of the summary files are specified using the 61 | "-z INPUT-FORMAT" option. The INPUT-FORMAT can be SEE, SPL, ISI or 62 | SIMPLE. When "-z" is specified, ROUGE assumed that the ROUGE 63 | evaluation configuration file is a file list with each evaluation 64 | instance per line in the following format: 65 | 66 | peer_path1 model_path1 model_path2 ... model_pathN 67 | peer_path2 model_path1 model_path2 ... model_pathN 68 | ... 69 | peer_pathM model_path1 model_path2 ... model_pathN 70 | 71 | The first file path is the peer summary (system summary) and it 72 | follows with a list of model summaries (reference summaries) separated 73 | by white spaces (spaces or tabs). 74 | (9) When stemming is applied, a new WordNet exception database based 75 | on WordNet 2.0 is used. The new database is included in the data 76 | directory. 77 | 78 | <> 79 | 80 | (1) Use "-h" option to see a list of options. 81 | Summary: 82 | Usage: ROUGE-1.5.4.pl 83 | [-a (evaluate all systems)] 84 | [-c cf] 85 | [-d (print per evaluation scores)] 86 | [-e ROUGE_EVAL_HOME] 87 | [-h (usage)] 88 | [-b n-bytes|-l n-words] 89 | [-m (use Porter stemmer)] 90 | [-n max-ngram] 91 | [-s (remove stopwords)] 92 | [-r number-of-samples (for resampling)] 93 | [-2 max-gap-length (if < 0 then no gap length limit)] 94 | [-3 ] 95 | [-u (include unigram in skip-bigram) default no)] 96 | [-U (same as -u but also compute regular skip-bigram)] 97 | [-w weight (weighting factor for WLCS)] 98 | [-v (verbose)] 99 | [-x (do not calculate ROUGE-L)] 100 | [-f A|B (scoring formula)] 101 | [-p alpha (0 <= alpha <=1)] 102 | [-t 0|1|2 (count by token instead of sentence)] 103 | [-z ] 104 | [] 105 | 106 | ROUGE-eval-config-file: Specify the evaluation setup. Three files come with the ROUGE 107 | evaluation package, i.e. ROUGE-test.xml, verify.xml, and verify-spl.xml are 108 | good examples. 109 | 110 | systemID: Specify which system in the ROUGE-eval-config-file to perform the evaluation. 111 | If '-a' option is used, then all systems are evaluated and users do not need to 112 | provide this argument. 113 | 114 | Default: 115 | When running ROUGE without supplying any options (except -a), the following defaults are used: 116 | (1) ROUGE-L is computed; 117 | (2) 95% confidence interval; 118 | (3) No stemming; 119 | (4) Stopwords are inlcuded in the calculations; 120 | (5) ROUGE looks for its data directory first through the ROUGE_EVAL_HOME environment variable. If 121 | it is not set, the current directory is used. 122 | (6) Use model average scoring formula. 123 | (7) Assign equal importance of ROUGE recall and precision in computing ROUGE f-measure, i.e. alpha=0.5. 124 | (8) Compute average ROUGE by averaging sentence (unit) ROUGE scores. 125 | Options: 126 | -2: Compute skip bigram (ROGUE-S) co-occurrence, also specify the maximum gap length between two words (skip-bigram) 127 | -u: Compute skip bigram as -2 but include unigram, i.e. treat unigram as "start-sentence-symbol unigram"; -2 has to be specified. 128 | -3: Compute BE score. 129 | H -> head only scoring (does not applied to Minipar-based BEs). 130 | HM -> head and modifier pair scoring. 131 | HMR -> head, modifier and relation triple scoring. 132 | HM1 -> H and HM scoring (same as HM for Minipar-based BEs). 133 | HMR1 -> HM and HMR scoring (same as HMR for Minipar-based BEs). 134 | HMR2 -> H, HM and HMR scoring (same as HMR for Minipar-based BEs). 135 | -a: Evaluate all systems specified in the ROUGE-eval-config-file. 136 | -c: Specify CF\% (0 <= CF <= 100) confidence interval to compute. The default is 95\% (i.e. CF=95). 137 | -d: Print per evaluation average score for each system. 138 | -e: Specify ROUGE_EVAL_HOME directory where the ROUGE data files can be found. 139 | This will overwrite the ROUGE_EVAL_HOME specified in the environment variable. 140 | -f: Select scoring formula: 'A' => model average; 'B' => best model 141 | -h: Print usage information. 142 | -b: Only use the first n bytes in the system/peer summary for the evaluation. 143 | -l: Only use the first n words in the system/peer summary for the evaluation. 144 | -m: Stem both model and system summaries using Porter stemmer before computing various statistics. 145 | -n: Compute ROUGE-N up to max-ngram length will be computed. 146 | -p: Relative importance of recall and precision ROUGE scores. Alpha -> 1 favors precision, Alpha -> 0 favors recall. 147 | -s: Remove stopwords in model and system summaries before computing various statistics. 148 | -t: Compute average ROUGE by averaging over the whole test corpus instead of sentences (units). 149 | 0: use sentence as counting unit, 1: use token as couting unit, 2: same as 1 but output raw counts 150 | instead of precision, recall, and f-measure scores. 2 is useful when computation of the final, 151 | precision, recall, and f-measure scores will be conducted later. 152 | -r: Specify the number of sampling point in bootstrap resampling (default is 1000). 153 | Smaller number will speed up the evaluation but less reliable confidence interval. 154 | -w: Compute ROUGE-W that gives consecutive matches of length L in an LCS a weight of 'L^weight' instead of just 'L' as in LCS. 155 | Typically this is set to 1.2 or other number greater than 1. 156 | -v: Print debugging information for diagnositic purpose. 157 | -x: Do not calculate ROUGE-L. 158 | -z: ROUGE-eval-config-file is a list of peer-model pair per line in the specified format (SEE|SPL|ISI|SIMPLE). 159 | 160 | (2) Please read RELEASE-NOTE.txt for information about updates from previous versions. 161 | 162 | (3) The following files coming with this package in the "sample-output" 163 | directory are the expected output of the evaluation files in the 164 | "sample-test" directory. 165 | (a) use "data" as ROUGE_EVAL_HOME, compute 95% confidence interval, 166 | compute ROUGE-L (longest common subsequence, default), 167 | compute ROUGE-S* (skip bigram) without gap length limit, 168 | compute also ROUGE-SU* (skip bigram with unigram), 169 | run resampling 1000 times, 170 | compute ROUGE-N (N=1 to 4), 171 | compute ROUGE-W (weight = 1.2), and 172 | compute these ROUGE scores for all systems: 173 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-a.out 174 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -a ROUGE-test.xml 175 | 176 | (b) Same as (a) but apply Porter's stemmer on the input: 177 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-a-m.out 178 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -m -a ROUGE-test.xml 179 | 180 | (c) Same as (b) but apply also a stopword list on the input: 181 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-a-m-s.out 182 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -m -s -a ROUGE-test.xml 183 | 184 | (d) Same as (a) but apply a summary length limit of 10 words: 185 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-l10-a.out 186 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -l 10 -a ROUGE-test.xml 187 | 188 | (e) Same as (d) but apply Porter's stemmer on the input: 189 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-l10-a-m.out 190 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -l 10 -m -a ROUGE-test.xml 191 | 192 | (f) Same as (e) but apply also a stopword list on the input: 193 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-l10-a-m-s.out 194 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -l 10 -m -s -a ROUGE-test.xml 195 | 196 | (g) Same as (a) but apply a summary lenght limit of 75 bytes: 197 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-b75-a.out 198 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -b 75 -a ROUGE-test.xml 199 | 200 | (h) Same as (g) but apply Porter's stemmer on the input: 201 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-b75-a-m.out 202 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -b 75 -m -a ROUGE-test.xml 203 | 204 | (i) Same as (h) but apply also a stopword list on the input: 205 | ROUGE-test-c95-2-1-U-r1000-n4-w1.2-b75-a-m-s.out 206 | > ROUGE-1.5.4.pl -e data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -b 75 -m -s -a ROUGE-test.xml 207 | 208 | Sample DUC2002 data (1 system and 1 model only per DUC 2002 topic), their BE and 209 | ROUGE evaluation configuration file in XML and file list format, 210 | and their expected output are also included for your reference. 211 | 212 | (a) Use DUC2002-BE-F.in.26.lst, a BE files list, as ROUGE the 213 | configuration file: 214 | command> ROUGE-1.5.4.pl -3 HM -z SIMPLE DUC2002-BE-F.in.26.lst 26 215 | output: DUC2002-BE-F.in.26.lst.out 216 | (b) Use DUC2002-BE-F.in.26.simple.xml as ROUGE XML evaluation configuration file: 217 | command> ROUGE-1.5.4.pl -3 HM DUC2002-BE-F.in.26.simple.xml 26 218 | output: DUC2002-BE-F.in.26.simple.out 219 | (c) Use DUC2002-BE-L.in.26.lst, a BE files list, as ROUGE the 220 | configuration file: 221 | command> ROUGE-1.5.4.pl -3 HM -z SIMPLE DUC2002-BE-L.in.26.lst 26 222 | output: DUC2002-BE-L.in.26.lst.out 223 | (d) Use DUC2002-BE-L.in.26.simple.xml as ROUGE XML evaluation configuration file: 224 | command> ROUGE-1.5.4.pl -3 HM DUC2002-BE-L.in.26.simple.xml 26 225 | output: DUC2002-BE-L.in.26.simple.out 226 | (e) Use DUC2002-ROUGE.in.26.spl.lst, a BE files list, as ROUGE the 227 | configuration file: 228 | command> ROUGE-1.5.4.pl -n 4 -z SPL DUC2002-ROUGE.in.26.spl.lst 26 229 | output: DUC2002-ROUGE.in.26.spl.lst.out 230 | (f) Use DUC2002-ROUGE.in.26.spl.xml as ROUGE XML evaluation configuration file: 231 | command> ROUGE-1.5.4.pl -n 4 DUC2002-ROUGE.in.26.spl.xml 26 232 | output: DUC2002-ROUGE.in.26.spl.out 233 | 234 | <> 235 | 236 | (1) You need to have DB_File installed. If the Perl script complains 237 | about database version incompatibility, you can create a new 238 | WordNet-2.0.exc.db by running the buildExceptionDB.pl script in 239 | the "data/WordNet-2.0-Exceptions" subdirectory. 240 | (2) You also need to install XML::DOM from http://www.cpan.org. 241 | Direct link: http://www.cpan.org/modules/by-module/XML/XML-DOM-1.43.tar.gz. 242 | You might need install extra Perl modules that are required by 243 | XML::DOM. 244 | (3) Setup an environment variable ROUGE_EVAL_HOME that points to the 245 | "data" subdirectory. For example, if your "data" subdirectory 246 | located at "/usr/local/ROUGE-1.5.4/data" then you can setup 247 | the ROUGE_EVAL_HOME as follows: 248 | (a) Using csh or tcsh: 249 | $command_prompt>setenv ROUGE_EVAL_HOME /usr/local/ROUGE-1.5.4/data 250 | (b) Using bash 251 | $command_prompt>ROUGE_EVAL_HOME=/usr/local/ROUGE-1.5.4/data 252 | $command_prompt>export ROUGE_EVAL_HOME 253 | (4) Run ROUGE-1.5.4.pl without supplying any arguments will give 254 | you a description of how to use the ROUGE script. 255 | (5) Please look into the included ROUGE-test.xml, verify.xml. and 256 | verify-spl.xml evaluation configuration files for preparing your 257 | own evaluation setup. More detailed description will be provided 258 | later. ROUGE-test.xml and verify.xml specify the input from 259 | systems and references are in SEE (Summary Evaluation Environment) 260 | format (http://www.isi.edu/~cyl/SEE); while verify-spl.xml specify 261 | inputs are in sentence per line format. 262 | 263 | <> 264 | 265 | (1) Please look into the "docs" directory for more information about 266 | ROUGE. 267 | (2) ROUGE-Note-v1.4.2.pdf explains how ROUGE works. It was published in 268 | Proceedings of the Workshop on Text Summarization Branches Out 269 | (WAS 2004), Bacelona, Spain, 2004. 270 | (3) NAACL2003.pdf presents the initial idea of applying n-gram 271 | co-occurrence statistics in automatic evaluation of 272 | summarization. It was publised in Proceedsings of 2003 Language 273 | Technology Conference (HLT-NAACL 2003), Edmonton, Canada, 2003. 274 | (4) NTCIR2004.pdf discusses the effect of sample size on the 275 | reliability of automatic evaluation results using data in the past 276 | Document Understanding Conference (DUC) as examples. It was 277 | published in Proceedings of the 4th NTCIR Meeting, Tokyo, Japan, 2004. 278 | (5) ACL2004.pdf shows how ROUGE can be applied on automatic evaluation 279 | of machine translation. It was published in Proceedings of the 42nd 280 | Annual Meeting of the Association for Computational Linguistics 281 | (ACL 2004), Barcelona, Spain, 2004. 282 | (6) COLING2004.pdf proposes a new meta-evaluation framework, ORANGE, for 283 | automatic evaluation of automatic evaluation methods. We showed 284 | that ROUGE-S and ROUGE-L were significantly better than BLEU, 285 | NIST, WER, and PER automatic MT evalaution methods under the 286 | ORANGE framework. It was published in Proceedings of the 20th 287 | International Conference on Computational Linguistics (COLING 2004), 288 | Geneva, Switzerland, 2004. 289 | (7) For information about BE, please go to http://www.isi.edu/~cyl/BE. 290 | 291 | <> 292 | 293 | Thanks for using the ROUGE evaluation package. If you have any 294 | questions or comments, please send them to cyl@isi.edu. I will do my 295 | best to answer your questions. 296 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/RELEASE-NOTE.txt: -------------------------------------------------------------------------------- 1 | # Revision Note: 05/26/2005, Chin-Yew LIN 2 | # 1.5.5 3 | # (1) Correct stemming on multi-token BE heads and modifiers. 4 | # Previously, only single token heads and modifiers were assumed. 5 | # (2) Correct the resampling routine which ignores the last evaluation 6 | # item in the evaluation list. Therefore, the average scores reported 7 | # by ROUGE is only based on the first N-1 evaluation items. 8 | # Thanks Barry Schiffman at Columbia University to report this bug. 9 | # This bug only affects ROUGE-1.5.X. For pre-1.5 ROUGE, it only affects 10 | # the computation of confidence interval (CI) estimation, i.e. CI is only 11 | # estimated by the first N-1 evaluation items, but it *does not* affect 12 | # average scores. 13 | # (3) Change read_text and read_text_LCS functions to read exact words or 14 | # bytes required by users. Previous versions carry out whitespace 15 | # compression and other string clear up actions before enforce the length 16 | # limit. 17 | # 1.5.4.1 18 | # (1) Minor description change about "-t 0" option. 19 | # 1.5.4 20 | # (1) Add easy evalution mode for single reference evaluations with -z 21 | # option. 22 | # 1.5.3 23 | # (1) Add option to compute ROUGE score based on SIMPLE BE format. Given 24 | # a set of peer and model summary file in BE format with appropriate 25 | # options, ROUGE will compute matching scores based on BE lexical 26 | # matches. 27 | # There are 6 options: 28 | # 1. H : Head only match. This is similar to unigram match but 29 | # only BE Head is used in matching. BEs generated by 30 | # Minipar-based breaker do not include head-only BEs, 31 | # therefore, the score will always be zero. Use HM or HMR 32 | # optiions instead. 33 | # 2. HM : Head and modifier match. This is similar to bigram or 34 | # skip bigram but it's head-modifier bigram match based on 35 | # parse result. Only BE triples with non-NIL modifier are 36 | # included in the matching. 37 | # 3. HMR : Head, modifier, and relation match. This is similar to 38 | # trigram match but it's head-modifier-relation trigram 39 | # match based on parse result. Only BE triples with non-NIL 40 | # relation are included in the matching. 41 | # 4. HM1 : This is combination of H and HM. It is similar to unigram + 42 | # bigram or skip bigram with unigram match but it's 43 | # head-modifier bigram match based on parse result. 44 | # In this case, the modifier field in a BE can be "NIL" 45 | # 5. HMR1 : This is combination of HM and HMR. It is similar to 46 | # trigram match but it's head-modifier-relation trigram 47 | # match based on parse result. In this case, the relation 48 | # field of the BE can be "NIL". 49 | # 6. HMR2 : This is combination of H, HM and HMR. It is similar to 50 | # trigram match but it's head-modifier-relation trigram 51 | # match based on parse result. In this case, the modifier and 52 | # relation fields of the BE can both be "NIL". 53 | # 1.5.2 54 | # (1) Add option to compute ROUGE score by token using the whole corpus 55 | # as average unit instead of individual sentences. Previous versions of 56 | # ROUGE uses sentence (or unit) boundary to break counting unit and takes 57 | # the average score from the counting unit as the final score. 58 | # Using the whole corpus as one single counting unit can potentially 59 | # improve the reliablity of the final score that treats each token as 60 | # equally important; while the previous approach considers each sentence as 61 | # equally important that ignores the length effect of each individual 62 | # sentences (i.e. long sentences contribute equal weight to the final 63 | # score as short sentences.) 64 | # +v1.2 provide a choice of these two counting modes that users can 65 | # choose the one that fits their scenarios. 66 | # 1.5.1 67 | # (1) Add precision oriented measure and f-measure to deal with different lengths 68 | # in candidates and references. Importance between recall and precision can 69 | # be controled by 'alpha' parameter: 70 | # alpha -> 0: recall is more important 71 | # alpha -> 1: precision is more important 72 | # Following Chapter 7 in C.J. van Rijsbergen's "Information Retrieval". 73 | # http://www.dcs.gla.ac.uk/Keith/Chapter.7/Ch.7.html 74 | # F = 1/(alpha * (1/P) + (1 - alpha) * (1/R)) ;;; weighted harmonic mean 75 | # 1.4.2 76 | # (1) Enforce length limit at the time when summary text is read. Previously (before 77 | # and including v1.4.1), length limit was enforced at tokenization time. 78 | # 1.4.1 79 | # (1) Fix potential over counting in ROUGE-L and ROUGE-W 80 | # In previous version (i.e. 1.4 and order), LCS hit is computed 81 | # by summing union hit over all model sentences. Each model sentence 82 | # is compared with all peer sentences and mark the union LCS. The 83 | # length of the union LCS is the hit of that model sentence. The 84 | # final hit is then sum over all model union LCS hits. This potentially 85 | # would over count a peer sentence which already been marked as contributed 86 | # to some other model sentence. Therefore, double counting is resulted. 87 | # This is seen in evalution where ROUGE-L score is higher than ROUGE-1 and 88 | # this is not correct. 89 | # ROUGEeval-1.4.1.pl fixes this by add a clip function to prevent 90 | # double counting. 91 | # 1.4 92 | # (1) Remove internal Jackknifing procedure: 93 | # Now the ROUGE script will use all the references listed in the 94 | # section in each section and no 95 | # automatic Jackknifing is performed. 96 | # If Jackknifing procedure is required when comparing human and system 97 | # performance, then users have to setup the procedure in the ROUGE 98 | # evaluation configuration script as follows: 99 | # For example, to evaluate system X with 4 references R1, R2, R3, and R4. 100 | # We do the following computation: 101 | # 102 | # for system: and for comparable human: 103 | # s1 = X vs. R1, R2, R3 h1 = R4 vs. R1, R2, R3 104 | # s2 = X vs. R1, R3, R4 h2 = R2 vs. R1, R3, R4 105 | # s3 = X vs. R1, R2, R4 h3 = R3 vs. R1, R2, R4 106 | # s4 = X vs. R2, R3, R4 h4 = R1 vs. R2, R3, R4 107 | # 108 | # Average system score for X = (s1+s2+s3+s4)/4 and for human = (h1+h2+h3+h4)/4 109 | # Implementation of this in a ROUGE evaluation configuration script is as follows: 110 | # Instead of writing all references in a evaluation section as below: 111 | # 112 | # ... 113 | # 114 | #

systemX 115 | # 116 | # 117 | # R1 118 | # R2 119 | # R3 120 | # R4 121 | # 122 | # 123 | # we write the following: 124 | # 125 | # 126 | #

systemX 127 | # 128 | # 129 | # R2 130 | # R3 131 | # R4 132 | # 133 | # 134 | # 135 | # 136 | #

systemX 137 | # 138 | # 139 | # R1 140 | # R3 141 | # R4 142 | # 143 | # 144 | # 145 | # 146 | #

systemX 147 | # 148 | # 149 | # R1 150 | # R2 151 | # R4 152 | # 153 | # 154 | # 155 | # 156 | #

systemX 157 | # 158 | # 159 | # R1 160 | # R2 161 | # R3 162 | # 163 | # 164 | # 165 | # In this case, the system and human numbers are comparable. 166 | # ROUGE as it is implemented for summarization evaluation is a recall-based metric. 167 | # As we increase the number of references, we are increasing the number of 168 | # count units (n-gram or skip-bigram or LCSes) in the target pool (i.e. 169 | # the number ends up in the denominator of any ROUGE formula is larger). 170 | # Therefore, a candidate summary has more chance to hit but it also has to 171 | # hit more. In the end, this means lower absolute ROUGE scores when more 172 | # references are used and using different sets of rerferences should not 173 | # be compared to each other. There is no nomalization mechanism in ROUGE 174 | # to properly adjust difference due to different number of references used. 175 | # 176 | # In the ROUGE implementations before v1.4 when there are N models provided for 177 | # evaluating system X in the ROUGE evaluation script, ROUGE does the 178 | # following: 179 | # (1) s1 = X vs. R2, R3, R4, ..., RN 180 | # (2) s2 = X vs. R1, R3, R4, ..., RN 181 | # (3) s3 = X vs. R1, R2, R4, ..., RN 182 | # (4) s4 = X vs. R1, R2, R3, ..., RN 183 | # (5) ... 184 | # (6) sN= X vs. R1, R2, R3, ..., RN-1 185 | # And the final ROUGE score is computed by taking average of (s1, s2, s3, 186 | # s4, ..., sN). When we provide only three references for evaluation of a 187 | # human summarizer, ROUGE does the same thing but using 2 out 3 188 | # references, get three numbers, and then take the average as the final 189 | # score. Now ROUGE (after v1.4) will use all references without this 190 | # internal Jackknifing procedure. The speed of the evaluation should improve 191 | # a lot, since only one set instead of four sets of computation will be 192 | # conducted. 193 | # 1.3 194 | # (1) Add skip bigram 195 | # (2) Add an option to specify the number of sampling point (default is 1000) 196 | # 1.2.3 197 | # (1) Correct the enviroment variable option: -e. Now users can specify evironment 198 | # variable ROUGE_EVAL_HOME using the "-e" option; previously this option is 199 | # not active. Thanks Zhouyan Li of Concordia University, Canada pointing this 200 | # out. 201 | # 1.2.2 202 | # (1) Correct confidence interval calculation for median, maximum, and minimum. 203 | # Line 390. 204 | # 1.2.1 205 | # (1) Add sentence per line format input format. See files in Verify-SPL for examples. 206 | # (2) Streamline command line arguments. 207 | # (3) Use bootstrap resampling to estimate confidence intervals instead of using t-test 208 | # or z-test which assume a normal distribution. 209 | # (4) Add LCS (longest common subsequence) evaluation method. 210 | # (5) Add WLCS (weighted longest common subsequence) evaluation method. 211 | # (6) Add length cutoff in bytes. 212 | # (7) Add an option to specify the longest ngram to compute. The default is 4. 213 | # 1.2 214 | # (1) Change zero condition check in subroutine &computeNGramScores when 215 | # computing $gram1Score from 216 | # if($totalGram2Count!=0) to 217 | # if($totalGram1Count!=0) 218 | # Thanks Ken Litkowski for this bug report. 219 | # This original script will set gram1Score to zero if there is no 220 | # bigram matches. This should rarely has significant affect the final score 221 | # since (a) there are bigram matches most of time; (b) the computation 222 | # of gram1Score is using Jackknifing procedure. However, this definitely 223 | # did not compute the correct $gram1Score when there is no bigram matches. 224 | # Therefore, users of version 1.1 should definitely upgrade to newer 225 | # version of the script that does not contain this bug. 226 | # Note: To use this script, two additional data files are needed: 227 | # (1) smart_common_words.txt - contains stopword list from SMART IR engine 228 | # (2) WordNet-1.6.exc.db - WordNet 1.6 exception inflexion database 229 | # These two files have to be put in a directory pointed by the environment 230 | # variable: "ROUGE_EVAL_HOME". 231 | # If environment variable ROUGE_EVAL_HOME does not exist, this script will 232 | # will assume it can find these two database files in the current directory. 233 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/AttDef.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::AttDef - A single XML attribute definition in an ATTLIST in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::AttDef extends L, but is not part of the DOM Level 1 8 | specification. 9 | 10 | Each object of this class represents one attribute definition in an AttlistDecl. 11 | 12 | =head2 METHODS 13 | 14 | =over 4 15 | 16 | =item getName 17 | 18 | Returns the attribute name. 19 | 20 | =item getDefault 21 | 22 | Returns the default value, or undef. 23 | 24 | =item isFixed 25 | 26 | Whether the attribute value is fixed (see #FIXED keyword.) 27 | 28 | =item isRequired 29 | 30 | Whether the attribute value is required (see #REQUIRED keyword.) 31 | 32 | =item isImplied 33 | 34 | Whether the attribute value is implied (see #IMPLIED keyword.) 35 | 36 | =back 37 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/AttlistDecl.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::AttlistDecl - An XML ATTLIST declaration in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::AttlistDecl extends L but is not part of the 8 | DOM Level 1 specification. 9 | 10 | This node represents an ATTLIST declaration, e.g. 11 | 12 | 17 | 18 | Each attribute definition is stored a separate AttDef node. The AttDef nodes can 19 | be retrieved with getAttDef and added with addAttDef. 20 | (The AttDef nodes are stored in a NamedNodeMap internally.) 21 | 22 | =head2 METHODS 23 | 24 | =over 4 25 | 26 | =item getName 27 | 28 | Returns the Element tagName. 29 | 30 | =item getAttDef (attrName) 31 | 32 | Returns the AttDef node for the attribute with the specified name. 33 | 34 | =item addAttDef (attrName, type, default, [ fixed ]) 35 | 36 | Adds a AttDef node for the attribute with the specified name. 37 | 38 | Parameters: 39 | I the attribute name. 40 | I the attribute type (e.g. "CDATA" or "(male|female)".) 41 | I the default value enclosed in quotes (!), the string #IMPLIED or 42 | the string #REQUIRED. 43 | I whether the attribute is '#FIXED' (default is 0.) 44 | 45 | =back 46 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Attr.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Attr - An XML attribute in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Attr extends L. 8 | 9 | The Attr nodes built by the XML::DOM::Parser always have one child node 10 | which is a Text node containing the expanded string value (i.e. EntityReferences 11 | are always expanded.) EntityReferences may be added when modifying or creating 12 | a new Document. 13 | 14 | The Attr interface represents an attribute in an Element object. 15 | Typically the allowable values for the attribute are defined in a 16 | document type definition. 17 | 18 | Attr objects inherit the Node interface, but since they are not 19 | actually child nodes of the element they describe, the DOM does not 20 | consider them part of the document tree. Thus, the Node attributes 21 | parentNode, previousSibling, and nextSibling have a undef value for Attr 22 | objects. The DOM takes the view that attributes are properties of 23 | elements rather than having a separate identity from the elements they 24 | are associated with; this should make it more efficient to implement 25 | such features as default attributes associated with all elements of a 26 | given type. Furthermore, Attr nodes may not be immediate children of a 27 | DocumentFragment. However, they can be associated with Element nodes 28 | contained within a DocumentFragment. In short, users and implementors 29 | of the DOM need to be aware that Attr nodes have some things in common 30 | with other objects inheriting the Node interface, but they also are 31 | quite distinct. 32 | 33 | The attribute's effective value is determined as follows: if this 34 | attribute has been explicitly assigned any value, that value is the 35 | attribute's effective value; otherwise, if there is a declaration for 36 | this attribute, and that declaration includes a default value, then 37 | that default value is the attribute's effective value; otherwise, the 38 | attribute does not exist on this element in the structure model until 39 | it has been explicitly added. Note that the nodeValue attribute on the 40 | Attr instance can also be used to retrieve the string version of the 41 | attribute's value(s). 42 | 43 | In XML, where the value of an attribute can contain entity references, 44 | the child nodes of the Attr node provide a representation in which 45 | entity references are not expanded. These child nodes may be either 46 | Text or EntityReference nodes. Because the attribute type may be 47 | unknown, there are no tokenized attribute values. 48 | 49 | =head2 METHODS 50 | 51 | =over 4 52 | 53 | =item getValue 54 | 55 | On retrieval, the value of the attribute is returned as a string. 56 | Character and general entity references are replaced with their values. 57 | 58 | =item setValue (str) 59 | 60 | DOM Spec: On setting, this creates a Text node with the unparsed contents of the 61 | string. 62 | 63 | =item getName 64 | 65 | Returns the name of this attribute. 66 | 67 | =back 68 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/CDATASection.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::CDATASection - Escaping XML text blocks in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::CDATASection extends L which extends 8 | L. 9 | 10 | CDATA sections are used to escape blocks of text containing characters 11 | that would otherwise be regarded as markup. The only delimiter that is 12 | recognized in a CDATA section is the "]]>" string that ends the CDATA 13 | section. CDATA sections can not be nested. The primary purpose is for 14 | including material such as XML fragments, without needing to escape all 15 | the delimiters. 16 | 17 | The DOMString attribute of the Text node holds the text that is 18 | contained by the CDATA section. Note that this may contain characters 19 | that need to be escaped outside of CDATA sections and that, depending 20 | on the character encoding ("charset") chosen for serialization, it may 21 | be impossible to write out some characters as part of a CDATA section. 22 | 23 | The CDATASection interface inherits the CharacterData interface through 24 | the Text interface. Adjacent CDATASections nodes are not merged by use 25 | of the Element.normalize() method. 26 | 27 | B XML::DOM::Parser and XML::DOM::ValParser convert all CDATASections 28 | to regular text by default. 29 | To preserve CDATASections, set the parser option KeepCDATA to 1. 30 | 31 | 32 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/CharacterData.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::CharacterData - Common interface for Text, CDATASections and Comments 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::CharacterData extends L 8 | 9 | The CharacterData interface extends Node with a set of attributes and 10 | methods for accessing character data in the DOM. For clarity this set 11 | is defined here rather than on each object that uses these attributes 12 | and methods. No DOM objects correspond directly to CharacterData, 13 | though Text, Comment and CDATASection do inherit the interface from it. 14 | All offsets in this interface start from 0. 15 | 16 | =head2 METHODS 17 | 18 | =over 4 19 | 20 | =item getData and setData (data) 21 | 22 | The character data of the node that implements this 23 | interface. The DOM implementation may not put arbitrary 24 | limits on the amount of data that may be stored in a 25 | CharacterData node. However, implementation limits may mean 26 | that the entirety of a node's data may not fit into a single 27 | DOMString. In such cases, the user may call substringData to 28 | retrieve the data in appropriately sized pieces. 29 | 30 | =item getLength 31 | 32 | The number of characters that are available through data and 33 | the substringData method below. This may have the value zero, 34 | i.e., CharacterData nodes may be empty. 35 | 36 | =item substringData (offset, count) 37 | 38 | Extracts a range of data from the node. 39 | 40 | Parameters: 41 | I Start offset of substring to extract. 42 | I The number of characters to extract. 43 | 44 | Return Value: The specified substring. If the sum of offset and count 45 | exceeds the length, then all characters to the end of 46 | the data are returned. 47 | 48 | =item appendData (str) 49 | 50 | Appends the string to the end of the character data of the 51 | node. Upon success, data provides access to the concatenation 52 | of data and the DOMString specified. 53 | 54 | =item insertData (offset, arg) 55 | 56 | Inserts a string at the specified character offset. 57 | 58 | Parameters: 59 | I The character offset at which to insert. 60 | I The DOMString to insert. 61 | 62 | =item deleteData (offset, count) 63 | 64 | Removes a range of characters from the node. 65 | Upon success, data and length reflect the change. 66 | If the sum of offset and count exceeds length then all characters 67 | from offset to the end of the data are deleted. 68 | 69 | Parameters: 70 | I The offset from which to remove characters. 71 | I The number of characters to delete. 72 | 73 | =item replaceData (offset, count, arg) 74 | 75 | Replaces the characters starting at the specified character 76 | offset with the specified string. 77 | 78 | Parameters: 79 | I The offset from which to start replacing. 80 | I The number of characters to replace. 81 | I The DOMString with which the range must be replaced. 82 | 83 | If the sum of offset and count exceeds length, then all characters to the end of 84 | the data are replaced (i.e., the effect is the same as a remove method call with 85 | the same range, followed by an append method invocation). 86 | 87 | =back 88 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Comment.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Comment - An XML comment in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Comment extends L which extends 8 | L. 9 | 10 | This node represents the content of a comment, i.e., all the characters 11 | between the starting ''. Note that this is the 12 | definition of a comment in XML, and, in practice, HTML, although some 13 | HTML tools may implement the full SGML comment structure. 14 | 15 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/DOMException.pm: -------------------------------------------------------------------------------- 1 | ###################################################################### 2 | package XML::DOM::DOMException; 3 | ###################################################################### 4 | 5 | use Exporter; 6 | 7 | use overload '""' => \&stringify; 8 | use vars qw ( @ISA @EXPORT @ErrorNames ); 9 | 10 | BEGIN 11 | { 12 | @ISA = qw( Exporter ); 13 | @EXPORT = qw( INDEX_SIZE_ERR 14 | DOMSTRING_SIZE_ERR 15 | HIERARCHY_REQUEST_ERR 16 | WRONG_DOCUMENT_ERR 17 | INVALID_CHARACTER_ERR 18 | NO_DATA_ALLOWED_ERR 19 | NO_MODIFICATION_ALLOWED_ERR 20 | NOT_FOUND_ERR 21 | NOT_SUPPORTED_ERR 22 | INUSE_ATTRIBUTE_ERR 23 | ); 24 | } 25 | 26 | sub UNKNOWN_ERR () {0;} # not in the DOM Spec! 27 | sub INDEX_SIZE_ERR () {1;} 28 | sub DOMSTRING_SIZE_ERR () {2;} 29 | sub HIERARCHY_REQUEST_ERR () {3;} 30 | sub WRONG_DOCUMENT_ERR () {4;} 31 | sub INVALID_CHARACTER_ERR () {5;} 32 | sub NO_DATA_ALLOWED_ERR () {6;} 33 | sub NO_MODIFICATION_ALLOWED_ERR () {7;} 34 | sub NOT_FOUND_ERR () {8;} 35 | sub NOT_SUPPORTED_ERR () {9;} 36 | sub INUSE_ATTRIBUTE_ERR () {10;} 37 | 38 | @ErrorNames = ( 39 | "UNKNOWN_ERR", 40 | "INDEX_SIZE_ERR", 41 | "DOMSTRING_SIZE_ERR", 42 | "HIERARCHY_REQUEST_ERR", 43 | "WRONG_DOCUMENT_ERR", 44 | "INVALID_CHARACTER_ERR", 45 | "NO_DATA_ALLOWED_ERR", 46 | "NO_MODIFICATION_ALLOWED_ERR", 47 | "NOT_FOUND_ERR", 48 | "NOT_SUPPORTED_ERR", 49 | "INUSE_ATTRIBUTE_ERR" 50 | ); 51 | sub new 52 | { 53 | my ($type, $code, $msg) = @_; 54 | my $self = bless {Code => $code}, $type; 55 | 56 | $self->{Message} = $msg if defined $msg; 57 | 58 | # print "=> Exception: " . $self->stringify . "\n"; 59 | $self; 60 | } 61 | 62 | sub getCode 63 | { 64 | $_[0]->{Code}; 65 | } 66 | 67 | #------------------------------------------------------------ 68 | # Extra method implementations 69 | 70 | sub getName 71 | { 72 | $ErrorNames[$_[0]->{Code}]; 73 | } 74 | 75 | sub getMessage 76 | { 77 | $_[0]->{Message}; 78 | } 79 | 80 | sub stringify 81 | { 82 | my $self = shift; 83 | 84 | "XML::DOM::DOMException(Code=" . $self->getCode . ", Name=" . 85 | $self->getName . ", Message=" . $self->getMessage . ")"; 86 | } 87 | 88 | 1; # package return code 89 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/DOMImplementation.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::DOMImplementation - Information about XML::DOM implementation 4 | 5 | =head1 DESCRIPTION 6 | 7 | The DOMImplementation interface provides a number of methods for 8 | performing operations that are independent of any particular instance 9 | of the document object model. 10 | 11 | The DOM Level 1 does not specify a way of creating a document instance, 12 | and hence document creation is an operation specific to an 13 | implementation. Future Levels of the DOM specification are expected to 14 | provide methods for creating documents directly. 15 | 16 | =head2 METHODS 17 | 18 | =over 4 19 | 20 | =item hasFeature (feature, version) 21 | 22 | Returns 1 if and only if feature equals "XML" and version equals "1.0". 23 | 24 | =back 25 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Document.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Document - An XML document node in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Document extends L. 8 | 9 | It is the main root of the XML document structure as returned by 10 | XML::DOM::Parser::parse and XML::DOM::Parser::parsefile. 11 | 12 | Since elements, text nodes, comments, processing instructions, etc. 13 | cannot exist outside the context of a Document, the Document interface 14 | also contains the factory methods needed to create these objects. The 15 | Node objects created have a getOwnerDocument method which associates 16 | them with the Document within whose context they were created. 17 | 18 | =head2 METHODS 19 | 20 | =over 4 21 | 22 | =item getDocumentElement 23 | 24 | This is a convenience method that allows direct access to 25 | the child node that is the root Element of the document. 26 | 27 | =item getDoctype 28 | 29 | The Document Type Declaration (see DocumentType) associated 30 | with this document. For HTML documents as well as XML 31 | documents without a document type declaration this returns 32 | undef. The DOM Level 1 does not support editing the Document 33 | Type Declaration. 34 | 35 | B: This implementation allows editing the doctype. 36 | See I for details. 37 | 38 | =item getImplementation 39 | 40 | The DOMImplementation object that handles this document. A 41 | DOM application may use objects from multiple implementations. 42 | 43 | =item createElement (tagName) 44 | 45 | Creates an element of the type specified. Note that the 46 | instance returned implements the Element interface, so 47 | attributes can be specified directly on the returned object. 48 | 49 | DOMExceptions: 50 | 51 | =over 4 52 | 53 | =item * INVALID_CHARACTER_ERR 54 | 55 | Raised if the tagName does not conform to the XML spec. 56 | 57 | =back 58 | 59 | =item createTextNode (data) 60 | 61 | Creates a Text node given the specified string. 62 | 63 | =item createComment (data) 64 | 65 | Creates a Comment node given the specified string. 66 | 67 | =item createCDATASection (data) 68 | 69 | Creates a CDATASection node given the specified string. 70 | 71 | =item createAttribute (name [, value [, specified ]]) 72 | 73 | Creates an Attr of the given name. Note that the Attr 74 | instance can then be set on an Element using the setAttribute method. 75 | 76 | B: The DOM Spec does not allow passing the value or the 77 | specified property in this method. In this implementation they are optional. 78 | 79 | Parameters: 80 | I The attribute's value. See Attr::setValue for details. 81 | If the value is not supplied, the specified property is set to 0. 82 | I Whether the attribute value was specified or whether the default 83 | value was used. If not supplied, it's assumed to be 1. 84 | 85 | DOMExceptions: 86 | 87 | =over 4 88 | 89 | =item * INVALID_CHARACTER_ERR 90 | 91 | Raised if the name does not conform to the XML spec. 92 | 93 | =back 94 | 95 | =item createProcessingInstruction (target, data) 96 | 97 | Creates a ProcessingInstruction node given the specified name and data strings. 98 | 99 | Parameters: 100 | I The target part of the processing instruction. 101 | I The data for the node. 102 | 103 | DOMExceptions: 104 | 105 | =over 4 106 | 107 | =item * INVALID_CHARACTER_ERR 108 | 109 | Raised if the target does not conform to the XML spec. 110 | 111 | =back 112 | 113 | =item createDocumentFragment 114 | 115 | Creates an empty DocumentFragment object. 116 | 117 | =item createEntityReference (name) 118 | 119 | Creates an EntityReference object. 120 | 121 | =back 122 | 123 | =head2 Additional methods not in the DOM Spec 124 | 125 | =over 4 126 | 127 | =item getXMLDecl and setXMLDecl (xmlDecl) 128 | 129 | Returns the XMLDecl for this Document or undef if none was specified. 130 | Note that XMLDecl is not part of the list of child nodes. 131 | 132 | =item setDoctype (doctype) 133 | 134 | Sets or replaces the DocumentType. 135 | B: Don't use appendChild or insertBefore to set the DocumentType. 136 | Even though doctype will be part of the list of child nodes, it is handled 137 | specially. 138 | 139 | =item getDefaultAttrValue (elem, attr) 140 | 141 | Returns the default attribute value as a string or undef, if none is available. 142 | 143 | Parameters: 144 | I The element tagName. 145 | I The attribute name. 146 | 147 | =item getEntity (name) 148 | 149 | Returns the Entity with the specified name. 150 | 151 | =item createXMLDecl (version, encoding, standalone) 152 | 153 | Creates an XMLDecl object. All parameters may be undefined. 154 | 155 | =item createDocumentType (name, sysId, pubId) 156 | 157 | Creates a DocumentType object. SysId and pubId may be undefined. 158 | 159 | =item createNotation (name, base, sysId, pubId) 160 | 161 | Creates a new Notation object. Consider using 162 | XML::DOM::DocumentType::addNotation! 163 | 164 | =item createEntity (parameter, notationName, value, sysId, pubId, ndata) 165 | 166 | Creates an Entity object. Consider using XML::DOM::DocumentType::addEntity! 167 | 168 | =item createElementDecl (name, model) 169 | 170 | Creates an ElementDecl object. 171 | 172 | DOMExceptions: 173 | 174 | =over 4 175 | 176 | =item * INVALID_CHARACTER_ERR 177 | 178 | Raised if the element name (tagName) does not conform to the XML spec. 179 | 180 | =back 181 | 182 | =item createAttlistDecl (name) 183 | 184 | Creates an AttlistDecl object. 185 | 186 | DOMExceptions: 187 | 188 | =over 4 189 | 190 | =item * INVALID_CHARACTER_ERR 191 | 192 | Raised if the element name (tagName) does not conform to the XML spec. 193 | 194 | =back 195 | 196 | =item expandEntity (entity [, parameter]) 197 | 198 | Expands the specified entity or parameter entity (if parameter=1) and returns 199 | its value as a string, or undef if the entity does not exist. 200 | (The entity name should not contain the '%', '&' or ';' delimiters.) 201 | 202 | =item check ( [$checker] ) 203 | 204 | Uses the specified L to validate the document. 205 | If no XML::Checker is supplied, a new XML::Checker is created. 206 | See L for details. 207 | 208 | =item check_sax ( [$checker] ) 209 | 210 | Similar to check() except it uses the SAX interface to XML::Checker instead of 211 | the expat interface. This method may disappear or replace check() at some time. 212 | 213 | =item createChecker () 214 | 215 | Creates an XML::Checker based on the document's DTD. 216 | The $checker can be reused to check any elements within the document. 217 | Create a new L whenever the DOCTYPE section of the document 218 | is altered! 219 | 220 | =back 221 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/DocumentFragment.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::DocumentFragment - Facilitates cut & paste in XML::DOM documents 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::DocumentFragment extends L 8 | 9 | DocumentFragment is a "lightweight" or "minimal" Document object. It is 10 | very common to want to be able to extract a portion of a document's 11 | tree or to create a new fragment of a document. Imagine implementing a 12 | user command like cut or rearranging a document by moving fragments 13 | around. It is desirable to have an object which can hold such fragments 14 | and it is quite natural to use a Node for this purpose. While it is 15 | true that a Document object could fulfil this role, a Document object 16 | can potentially be a heavyweight object, depending on the underlying 17 | implementation. What is really needed for this is a very lightweight 18 | object. DocumentFragment is such an object. 19 | 20 | Furthermore, various operations -- such as inserting nodes as children 21 | of another Node -- may take DocumentFragment objects as arguments; this 22 | results in all the child nodes of the DocumentFragment being moved to 23 | the child list of this node. 24 | 25 | The children of a DocumentFragment node are zero or more nodes 26 | representing the tops of any sub-trees defining the structure of the 27 | document. DocumentFragment nodes do not need to be well-formed XML 28 | documents (although they do need to follow the rules imposed upon 29 | well-formed XML parsed entities, which can have multiple top nodes). 30 | For example, a DocumentFragment might have only one child and that 31 | child node could be a Text node. Such a structure model represents 32 | neither an HTML document nor a well-formed XML document. 33 | 34 | When a DocumentFragment is inserted into a Document (or indeed any 35 | other Node that may take children) the children of the DocumentFragment 36 | and not the DocumentFragment itself are inserted into the Node. This 37 | makes the DocumentFragment very useful when the user wishes to create 38 | nodes that are siblings; the DocumentFragment acts as the parent of 39 | these nodes so that the user can use the standard methods from the Node 40 | interface, such as insertBefore() and appendChild(). 41 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/DocumentType.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::DocumentType - An XML document type (DTD) in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::DocumentType extends L. 8 | 9 | Each Document has a doctype attribute whose value is either null or a 10 | DocumentType object. The DocumentType interface in the DOM Level 1 Core 11 | provides an interface to the list of entities that are defined for the 12 | document, and little else because the effect of namespaces and the 13 | various XML scheme efforts on DTD representation are not clearly 14 | understood as of this writing. 15 | The DOM Level 1 doesn't support editing DocumentType nodes. 16 | 17 | B: This implementation has added a lot of extra 18 | functionality to the DOM Level 1 interface. 19 | To allow editing of the DocumentType nodes, see XML::DOM::ignoreReadOnly. 20 | 21 | =head2 METHODS 22 | 23 | =over 4 24 | 25 | =item getName 26 | 27 | Returns the name of the DTD, i.e. the name immediately following the 28 | DOCTYPE keyword. 29 | 30 | =item getEntities 31 | 32 | A NamedNodeMap containing the general entities, both external 33 | and internal, declared in the DTD. Duplicates are discarded. 34 | For example in: 35 | 36 | 38 | 39 | 40 | ]> 41 | 42 | 43 | the interface provides access to foo and bar but not baz. 44 | Every node in this map also implements the Entity interface. 45 | 46 | The DOM Level 1 does not support editing entities, therefore 47 | entities cannot be altered in any way. 48 | 49 | B: See XML::DOM::ignoreReadOnly to edit the DocumentType etc. 50 | 51 | =item getNotations 52 | 53 | A NamedNodeMap containing the notations declared in the DTD. 54 | Duplicates are discarded. Every node in this map also 55 | implements the Notation interface. 56 | 57 | The DOM Level 1 does not support editing notations, therefore 58 | notations cannot be altered in any way. 59 | 60 | B: See XML::DOM::ignoreReadOnly to edit the DocumentType etc. 61 | 62 | =head2 Additional methods not in the DOM Spec 63 | 64 | =item Creating and setting the DocumentType 65 | 66 | A new DocumentType can be created with: 67 | 68 | $doctype = $doc->createDocumentType ($name, $sysId, $pubId, $internal); 69 | 70 | To set (or replace) the DocumentType for a particular document, use: 71 | 72 | $doc->setDocType ($doctype); 73 | 74 | =item getSysId and setSysId (sysId) 75 | 76 | Returns or sets the system id. 77 | 78 | =item getPubId and setPubId (pudId) 79 | 80 | Returns or sets the public id. 81 | 82 | =item setName (name) 83 | 84 | Sets the name of the DTD, i.e. the name immediately following the 85 | DOCTYPE keyword. Note that this should always be the same as the element 86 | tag name of the root element. 87 | 88 | =item getAttlistDecl (elemName) 89 | 90 | Returns the AttlistDecl for the Element with the specified name, or undef. 91 | 92 | =item getElementDecl (elemName) 93 | 94 | Returns the ElementDecl for the Element with the specified name, or undef. 95 | 96 | =item getEntity (entityName) 97 | 98 | Returns the Entity with the specified name, or undef. 99 | 100 | =item addAttlistDecl (elemName) 101 | 102 | Adds a new AttDecl node with the specified elemName if one doesn't exist yet. 103 | Returns the AttlistDecl (new or existing) node. 104 | 105 | =item addElementDecl (elemName, model) 106 | 107 | Adds a new ElementDecl node with the specified elemName and model if one doesn't 108 | exist yet. 109 | Returns the AttlistDecl (new or existing) node. The model is ignored if one 110 | already existed. 111 | 112 | =item addEntity (notationName, value, sysId, pubId, ndata, parameter) 113 | 114 | Adds a new Entity node. Don't use createEntity and appendChild, because it should 115 | be added to the internal NamedNodeMap containing the entities. 116 | 117 | Parameters: 118 | I the entity name. 119 | I the entity value. 120 | I the system id (if any.) 121 | I the public id (if any.) 122 | I the NDATA declaration (if any, for general unparsed entities.) 123 | I whether it is a parameter entity (%ent;) or not (&ent;). 124 | 125 | SysId, pubId and ndata may be undefined. 126 | 127 | DOMExceptions: 128 | 129 | =over 4 130 | 131 | =item * INVALID_CHARACTER_ERR 132 | 133 | Raised if the notationName does not conform to the XML spec. 134 | 135 | =back 136 | 137 | =item addNotation (name, base, sysId, pubId) 138 | 139 | Adds a new Notation object. 140 | 141 | Parameters: 142 | I the notation name. 143 | I the base to be used for resolving a relative URI. 144 | I the system id. 145 | I the public id. 146 | 147 | Base, sysId, and pubId may all be undefined. 148 | (These parameters are passed by the XML::Parser Notation handler.) 149 | 150 | DOMExceptions: 151 | 152 | =over 4 153 | 154 | =item * INVALID_CHARACTER_ERR 155 | 156 | Raised if the notationName does not conform to the XML spec. 157 | 158 | =back 159 | 160 | =item addAttDef (elemName, attrName, type, default, fixed) 161 | 162 | Adds a new attribute definition. It will add the AttDef node to the AttlistDecl 163 | if it exists. If an AttDef with the specified attrName already exists for the 164 | given elemName, this function only generates a warning. 165 | 166 | See XML::DOM::AttDef::new for the other parameters. 167 | 168 | =item getDefaultAttrValue (elem, attr) 169 | 170 | Returns the default attribute value as a string or undef, if none is available. 171 | 172 | Parameters: 173 | I The element tagName. 174 | I The attribute name. 175 | 176 | =item expandEntity (entity [, parameter]) 177 | 178 | Expands the specified entity or parameter entity (if parameter=1) and returns 179 | its value as a string, or undef if the entity does not exist. 180 | (The entity name should not contain the '%', '&' or ';' delimiters.) 181 | 182 | =back 183 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Element.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Element - An XML element node in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Element extends L. 8 | 9 | By far the vast majority of objects (apart from text) that authors 10 | encounter when traversing a document are Element nodes. Assume the 11 | following XML document: 12 | 13 | 14 | 15 | 16 | 17 | 18 | When represented using DOM, the top node is an Element node for 19 | "elementExample", which contains two child Element nodes, one for 20 | "subelement1" and one for "subelement2". "subelement1" contains no 21 | child nodes. 22 | 23 | Elements may have attributes associated with them; since the Element 24 | interface inherits from Node, the generic Node interface method 25 | getAttributes may be used to retrieve the set of all attributes for an 26 | element. There are methods on the Element interface to retrieve either 27 | an Attr object by name or an attribute value by name. In XML, where an 28 | attribute value may contain entity references, an Attr object should be 29 | retrieved to examine the possibly fairly complex sub-tree representing 30 | the attribute value. On the other hand, in HTML, where all attributes 31 | have simple string values, methods to directly access an attribute 32 | value can safely be used as a convenience. 33 | 34 | =head2 METHODS 35 | 36 | =over 4 37 | 38 | =item getTagName 39 | 40 | The name of the element. For example, in: 41 | 42 | 43 | ... 44 | 45 | 46 | tagName has the value "elementExample". Note that this is 47 | case-preserving in XML, as are all of the operations of the 48 | DOM. 49 | 50 | =item getAttribute (name) 51 | 52 | Retrieves an attribute value by name. 53 | 54 | Return Value: The Attr value as a string, or the empty string if that 55 | attribute does not have a specified or default value. 56 | 57 | =item setAttribute (name, value) 58 | 59 | Adds a new attribute. If an attribute with that name is 60 | already present in the element, its value is changed to be 61 | that of the value parameter. This value is a simple string, 62 | it is not parsed as it is being set. So any markup (such as 63 | syntax to be recognized as an entity reference) is treated as 64 | literal text, and needs to be appropriately escaped by the 65 | implementation when it is written out. In order to assign an 66 | attribute value that contains entity references, the user 67 | must create an Attr node plus any Text and EntityReference 68 | nodes, build the appropriate subtree, and use 69 | setAttributeNode to assign it as the value of an attribute. 70 | 71 | 72 | DOMExceptions: 73 | 74 | =over 4 75 | 76 | =item * INVALID_CHARACTER_ERR 77 | 78 | Raised if the specified name contains an invalid character. 79 | 80 | =item * NO_MODIFICATION_ALLOWED_ERR 81 | 82 | Raised if this node is readonly. 83 | 84 | =back 85 | 86 | =item removeAttribute (name) 87 | 88 | Removes an attribute by name. If the removed attribute has a 89 | default value it is immediately replaced. 90 | 91 | DOMExceptions: 92 | 93 | =over 4 94 | 95 | =item * NO_MODIFICATION_ALLOWED_ERR 96 | 97 | Raised if this node is readonly. 98 | 99 | =back 100 | 101 | =item getAttributeNode 102 | 103 | Retrieves an Attr node by name. 104 | 105 | Return Value: The Attr node with the specified attribute name or undef 106 | if there is no such attribute. 107 | 108 | =item setAttributeNode (attr) 109 | 110 | Adds a new attribute. If an attribute with that name is 111 | already present in the element, it is replaced by the new one. 112 | 113 | Return Value: If the newAttr attribute replaces an existing attribute 114 | with the same name, the previously existing Attr node is 115 | returned, otherwise undef is returned. 116 | 117 | DOMExceptions: 118 | 119 | =over 4 120 | 121 | =item * WRONG_DOCUMENT_ERR 122 | 123 | Raised if newAttr was created from a different document than the one that created 124 | the element. 125 | 126 | =item * NO_MODIFICATION_ALLOWED_ERR 127 | 128 | Raised if this node is readonly. 129 | 130 | =item * INUSE_ATTRIBUTE_ERR 131 | 132 | Raised if newAttr is already an attribute of another Element object. The DOM 133 | user must explicitly clone Attr nodes to re-use them in other elements. 134 | 135 | =back 136 | 137 | =item removeAttributeNode (oldAttr) 138 | 139 | Removes the specified attribute. If the removed Attr has a default value it is 140 | immediately replaced. If the Attr already is the default value, nothing happens 141 | and nothing is returned. 142 | 143 | Parameters: 144 | I The Attr node to remove from the attribute list. 145 | 146 | Return Value: The Attr node that was removed. 147 | 148 | DOMExceptions: 149 | 150 | =over 4 151 | 152 | =item * NO_MODIFICATION_ALLOWED_ERR 153 | 154 | Raised if this node is readonly. 155 | 156 | =item * NOT_FOUND_ERR 157 | 158 | Raised if oldAttr is not an attribute of the element. 159 | 160 | =back 161 | 162 | =head2 Additional methods not in the DOM Spec 163 | 164 | =over 4 165 | 166 | =item setTagName (newTagName) 167 | 168 | Sets the tag name of the Element. Note that this method is not portable 169 | between DOM implementations. 170 | 171 | DOMExceptions: 172 | 173 | =over 4 174 | 175 | =item * INVALID_CHARACTER_ERR 176 | 177 | Raised if the specified name contains an invalid character. 178 | 179 | =back 180 | 181 | =item check ($checker) 182 | 183 | Uses the specified L to validate the document. 184 | NOTE: an XML::Checker must be supplied. The checker can be created in 185 | different ways, e.g. when parsing a document with XML::DOM::ValParser, 186 | or with XML::DOM::Document::createChecker(). 187 | See L for more info. 188 | 189 | =back 190 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/ElementDecl.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::ElementDecl - An XML ELEMENT declaration in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::ElementDecl extends L but is not part of the 8 | DOM Level 1 specification. 9 | 10 | This node represents an Element declaration, e.g. 11 | 12 | 13 | 14 | =head2 METHODS 15 | 16 | =over 4 17 | 18 | =item getName 19 | 20 | Returns the Element tagName. 21 | 22 | =item getModel and setModel (model) 23 | 24 | Returns and sets the model as a string, e.g. 25 | "(street+, city, state, zip, country?)" in the above example. 26 | 27 | =back 28 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Entity.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Entity - An XML ENTITY in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Entity extends L. 8 | 9 | This node represents an Entity declaration, e.g. 10 | 11 | 12 | 13 | 14 | 15 | The first one is called a parameter entity and is referenced like this: %draft; 16 | The 2nd is a (regular) entity and is referenced like this: &hatch-pic; 17 | 18 | =head2 METHODS 19 | 20 | =over 4 21 | 22 | =item getNotationName 23 | 24 | Returns the name of the notation for the entity. 25 | 26 | I The DOM Spec says: For unparsed entities, the name of the 27 | notation for the entity. For parsed entities, this is null. 28 | (This implementation does not support unparsed entities.) 29 | 30 | =item getSysId 31 | 32 | Returns the system id, or undef. 33 | 34 | =item getPubId 35 | 36 | Returns the public id, or undef. 37 | 38 | =back 39 | 40 | =head2 Additional methods not in the DOM Spec 41 | 42 | =over 4 43 | 44 | =item isParameterEntity 45 | 46 | Whether it is a parameter entity (%ent;) or not (&ent;) 47 | 48 | =item getValue 49 | 50 | Returns the entity value. 51 | 52 | =item getNdata 53 | 54 | Returns the NDATA declaration (for general unparsed entities), or undef. 55 | 56 | =back 57 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/EntityReference.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::EntityReference - An XML ENTITY reference in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::EntityReference extends L. 8 | 9 | EntityReference objects may be inserted into the structure model when 10 | an entity reference is in the source document, or when the user wishes 11 | to insert an entity reference. Note that character references and 12 | references to predefined entities are considered to be expanded by the 13 | HTML or XML processor so that characters are represented by their 14 | Unicode equivalent rather than by an entity reference. Moreover, the 15 | XML processor may completely expand references to entities while 16 | building the structure model, instead of providing EntityReference 17 | objects. If it does provide such objects, then for a given 18 | EntityReference node, it may be that there is no Entity node 19 | representing the referenced entity; but if such an Entity exists, then 20 | the child list of the EntityReference node is the same as that of the 21 | Entity node. As with the Entity node, all descendants of the 22 | EntityReference are readonly. 23 | 24 | The resolution of the children of the EntityReference (the replacement 25 | value of the referenced Entity) may be lazily evaluated; actions by the 26 | user (such as calling the childNodes method on the EntityReference 27 | node) are assumed to trigger the evaluation. 28 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/NamedNodeMap.pm: -------------------------------------------------------------------------------- 1 | ###################################################################### 2 | package XML::DOM::NamedNodeMap; 3 | ###################################################################### 4 | 5 | use strict; 6 | 7 | use Carp; 8 | use XML::DOM::DOMException; 9 | use XML::DOM::NodeList; 10 | 11 | use vars qw( $Special ); 12 | 13 | # Constant definition: 14 | # Note: a real Name should have at least 1 char, so nobody else should use this 15 | $Special = ""; 16 | 17 | sub new 18 | { 19 | my ($class, %args) = @_; 20 | 21 | $args{Values} = new XML::DOM::NodeList; 22 | 23 | # Store all NamedNodeMap properties in element $Special 24 | bless { $Special => \%args}, $class; 25 | } 26 | 27 | sub getNamedItem 28 | { 29 | # Don't return the $Special item! 30 | ($_[1] eq $Special) ? undef : $_[0]->{$_[1]}; 31 | } 32 | 33 | sub setNamedItem 34 | { 35 | my ($self, $node) = @_; 36 | my $prop = $self->{$Special}; 37 | 38 | my $name = $node->getNodeName; 39 | 40 | if ($XML::DOM::SafeMode) 41 | { 42 | croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR) 43 | if $self->isReadOnly; 44 | 45 | croak new XML::DOM::DOMException (WRONG_DOCUMENT_ERR) 46 | if $node->[XML::DOM::Node::_Doc] != $prop->{Doc}; 47 | 48 | croak new XML::DOM::DOMException (INUSE_ATTRIBUTE_ERR) 49 | if defined ($node->[XML::DOM::Node::_UsedIn]); 50 | 51 | croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, 52 | "can't add name with NodeName [$name] to NamedNodeMap") 53 | if $name eq $Special; 54 | } 55 | 56 | my $values = $prop->{Values}; 57 | my $index = -1; 58 | 59 | my $prev = $self->{$name}; 60 | if (defined $prev) 61 | { 62 | # decouple previous node 63 | $prev->decoupleUsedIn; 64 | 65 | # find index of $prev 66 | $index = 0; 67 | for my $val (@{$values}) 68 | { 69 | last if ($val == $prev); 70 | $index++; 71 | } 72 | } 73 | 74 | $self->{$name} = $node; 75 | $node->[XML::DOM::Node::_UsedIn] = $self; 76 | 77 | if ($index == -1) 78 | { 79 | push (@{$values}, $node); 80 | } 81 | else # replace previous node with new node 82 | { 83 | splice (@{$values}, $index, 1, $node); 84 | } 85 | 86 | $prev; 87 | } 88 | 89 | sub removeNamedItem 90 | { 91 | my ($self, $name) = @_; 92 | 93 | # Be careful that user doesn't delete $Special node! 94 | croak new XML::DOM::DOMException (NOT_FOUND_ERR) 95 | if $name eq $Special; 96 | 97 | my $node = $self->{$name}; 98 | 99 | croak new XML::DOM::DOMException (NOT_FOUND_ERR) 100 | unless defined $node; 101 | 102 | # The DOM Spec doesn't mention this Exception - I think it's an oversight 103 | croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR) 104 | if $self->isReadOnly; 105 | 106 | $node->decoupleUsedIn; 107 | delete $self->{$name}; 108 | 109 | # remove node from Values list 110 | my $values = $self->getValues; 111 | my $index = 0; 112 | for my $val (@{$values}) 113 | { 114 | if ($val == $node) 115 | { 116 | splice (@{$values}, $index, 1, ()); 117 | last; 118 | } 119 | $index++; 120 | } 121 | $node; 122 | } 123 | 124 | # The following 2 are really bogus. DOM should use an iterator instead (Clark) 125 | 126 | sub item 127 | { 128 | my ($self, $item) = @_; 129 | $self->{$Special}->{Values}->[$item]; 130 | } 131 | 132 | sub getLength 133 | { 134 | my ($self) = @_; 135 | my $vals = $self->{$Special}->{Values}; 136 | int (@$vals); 137 | } 138 | 139 | #------------------------------------------------------------ 140 | # Extra method implementations 141 | 142 | sub isReadOnly 143 | { 144 | return 0 if $XML::DOM::IgnoreReadOnly; 145 | 146 | my $used = $_[0]->{$Special}->{UsedIn}; 147 | defined $used ? $used->isReadOnly : 0; 148 | } 149 | 150 | sub cloneNode 151 | { 152 | my ($self, $deep) = @_; 153 | my $prop = $self->{$Special}; 154 | 155 | my $map = new XML::DOM::NamedNodeMap (Doc => $prop->{Doc}); 156 | # Not copying Parent property on purpose! 157 | 158 | local $XML::DOM::IgnoreReadOnly = 1; # temporarily... 159 | 160 | for my $val (@{$prop->{Values}}) 161 | { 162 | my $key = $val->getNodeName; 163 | 164 | my $newNode = $val->cloneNode ($deep); 165 | $newNode->[XML::DOM::Node::_UsedIn] = $map; 166 | $map->{$key} = $newNode; 167 | push (@{$map->{$Special}->{Values}}, $newNode); 168 | } 169 | 170 | $map; 171 | } 172 | 173 | sub setOwnerDocument 174 | { 175 | my ($self, $doc) = @_; 176 | my $special = $self->{$Special}; 177 | 178 | $special->{Doc} = $doc; 179 | for my $kid (@{$special->{Values}}) 180 | { 181 | $kid->setOwnerDocument ($doc); 182 | } 183 | } 184 | 185 | sub getChildIndex 186 | { 187 | my ($self, $attr) = @_; 188 | my $i = 0; 189 | for my $kid (@{$self->{$Special}->{Values}}) 190 | { 191 | return $i if $kid == $attr; 192 | $i++; 193 | } 194 | -1; # not found 195 | } 196 | 197 | sub getValues 198 | { 199 | wantarray ? @{ $_[0]->{$Special}->{Values} } : $_[0]->{$Special}->{Values}; 200 | } 201 | 202 | # Remove circular dependencies. The NamedNodeMap and its values should 203 | # not be used afterwards. 204 | sub dispose 205 | { 206 | my $self = shift; 207 | 208 | for my $kid (@{$self->getValues}) 209 | { 210 | undef $kid->[XML::DOM::Node::_UsedIn]; # was delete 211 | $kid->dispose; 212 | } 213 | 214 | delete $self->{$Special}->{Doc}; 215 | delete $self->{$Special}->{Parent}; 216 | delete $self->{$Special}->{Values}; 217 | 218 | for my $key (keys %$self) 219 | { 220 | delete $self->{$key}; 221 | } 222 | } 223 | 224 | sub setParentNode 225 | { 226 | $_[0]->{$Special}->{Parent} = $_[1]; 227 | } 228 | 229 | sub getProperty 230 | { 231 | $_[0]->{$Special}->{$_[1]}; 232 | } 233 | 234 | #?? remove after debugging 235 | sub toString 236 | { 237 | my ($self) = @_; 238 | my $str = "NamedNodeMap["; 239 | while (my ($key, $val) = each %$self) 240 | { 241 | if ($key eq $Special) 242 | { 243 | $str .= "##Special ("; 244 | while (my ($k, $v) = each %$val) 245 | { 246 | if ($k eq "Values") 247 | { 248 | $str .= $k . " => ["; 249 | for my $a (@$v) 250 | { 251 | # $str .= $a->getNodeName . "=" . $a . ","; 252 | $str .= $a->toString . ","; 253 | } 254 | $str .= "], "; 255 | } 256 | else 257 | { 258 | $str .= $k . " => " . $v . ", "; 259 | } 260 | } 261 | $str .= "), "; 262 | } 263 | else 264 | { 265 | $str .= $key . " => " . $val . ", "; 266 | } 267 | } 268 | $str . "]"; 269 | } 270 | 271 | 1; # package return code 272 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/NamedNodeMap.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::NamedNodeMap - A hash table interface for XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | Objects implementing the NamedNodeMap interface are used to represent 8 | collections of nodes that can be accessed by name. Note that 9 | NamedNodeMap does not inherit from NodeList; NamedNodeMaps are not 10 | maintained in any particular order. Objects contained in an object 11 | implementing NamedNodeMap may also be accessed by an ordinal index, but 12 | this is simply to allow convenient enumeration of the contents of a 13 | NamedNodeMap, and does not imply that the DOM specifies an order to 14 | these Nodes. 15 | 16 | Note that in this implementation, the objects added to a NamedNodeMap 17 | are kept in order. 18 | 19 | =head2 METHODS 20 | 21 | =over 4 22 | 23 | =item getNamedItem (name) 24 | 25 | Retrieves a node specified by name. 26 | 27 | Return Value: A Node (of any type) with the specified name, or undef if 28 | the specified name did not identify any node in the map. 29 | 30 | =item setNamedItem (arg) 31 | 32 | Adds a node using its nodeName attribute. 33 | 34 | As the nodeName attribute is used to derive the name which 35 | the node must be stored under, multiple nodes of certain 36 | types (those that have a "special" string value) cannot be 37 | stored as the names would clash. This is seen as preferable 38 | to allowing nodes to be aliased. 39 | 40 | Parameters: 41 | I A node to store in a named node map. 42 | 43 | The node will later be accessible using the value of the nodeName 44 | attribute of the node. If a node with that name is 45 | already present in the map, it is replaced by the new one. 46 | 47 | Return Value: If the new Node replaces an existing node with the same 48 | name the previously existing Node is returned, otherwise undef is returned. 49 | 50 | DOMExceptions: 51 | 52 | =over 4 53 | 54 | =item * WRONG_DOCUMENT_ERR 55 | 56 | Raised if arg was created from a different document than the one that 57 | created the NamedNodeMap. 58 | 59 | =item * NO_MODIFICATION_ALLOWED_ERR 60 | 61 | Raised if this NamedNodeMap is readonly. 62 | 63 | =item * INUSE_ATTRIBUTE_ERR 64 | 65 | Raised if arg is an Attr that is already an attribute of another Element object. 66 | The DOM user must explicitly clone Attr nodes to re-use them in other elements. 67 | 68 | =back 69 | 70 | =item removeNamedItem (name) 71 | 72 | Removes a node specified by name. If the removed node is an 73 | Attr with a default value it is immediately replaced. 74 | 75 | Return Value: The node removed from the map or undef if no node with 76 | such a name exists. 77 | 78 | DOMException: 79 | 80 | =over 4 81 | 82 | =item * NOT_FOUND_ERR 83 | 84 | Raised if there is no node named name in the map. 85 | 86 | =back 87 | 88 | =item item (index) 89 | 90 | Returns the indexth item in the map. If index is greater than 91 | or equal to the number of nodes in the map, this returns undef. 92 | 93 | Return Value: The node at the indexth position in the NamedNodeMap, or 94 | undef if that is not a valid index. 95 | 96 | =item getLength 97 | 98 | Returns the number of nodes in the map. The range of valid child node 99 | indices is 0 to length-1 inclusive. 100 | 101 | =back 102 | 103 | =head2 Additional methods not in the DOM Spec 104 | 105 | =over 4 106 | 107 | =item getValues 108 | 109 | Returns a NodeList with the nodes contained in the NamedNodeMap. 110 | The NodeList is "live", in that it reflects changes made to the NamedNodeMap. 111 | 112 | When this method is called in a list context, it returns a regular perl list 113 | containing the values. Note that this list is not "live". E.g. 114 | 115 | @list = $map->getValues; # returns a perl list 116 | $nodelist = $map->getValues; # returns a NodeList (object ref.) 117 | for my $val ($map->getValues) # iterate over the values 118 | 119 | =item getChildIndex (node) 120 | 121 | Returns the index of the node in the NodeList as returned by getValues, or -1 122 | if the node is not in the NamedNodeMap. 123 | 124 | =item dispose 125 | 126 | Removes all circular references in this NamedNodeMap and its descendants so the 127 | objects can be claimed for garbage collection. The objects should not be used 128 | afterwards. 129 | 130 | =back 131 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Node.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Node - Super class of all nodes in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Node is the super class of all nodes in an XML::DOM document. 8 | This means that all nodes that subclass XML::DOM::Node also inherit all 9 | the methods that XML::DOM::Node implements. 10 | 11 | =head2 GLOBAL VARIABLES 12 | 13 | =over 4 14 | 15 | =item @NodeNames 16 | 17 | The variable @XML::DOM::Node::NodeNames maps the node type constants to strings. 18 | It is used by XML::DOM::Node::getNodeTypeName. 19 | 20 | =back 21 | 22 | =head2 METHODS 23 | 24 | =over 4 25 | 26 | =item getNodeType 27 | 28 | Return an integer indicating the node type. See XML::DOM constants. 29 | 30 | =item getNodeName 31 | 32 | Return a property or a hardcoded string, depending on the node type. 33 | Here are the corresponding functions or values: 34 | 35 | Attr getName 36 | AttDef getName 37 | AttlistDecl getName 38 | CDATASection "#cdata-section" 39 | Comment "#comment" 40 | Document "#document" 41 | DocumentType getNodeName 42 | DocumentFragment "#document-fragment" 43 | Element getTagName 44 | ElementDecl getName 45 | EntityReference getEntityName 46 | Entity getNotationName 47 | Notation getName 48 | ProcessingInstruction getTarget 49 | Text "#text" 50 | XMLDecl "#xml-declaration" 51 | 52 | B: AttDef, AttlistDecl, ElementDecl and XMLDecl were added for 53 | completeness. 54 | 55 | =item getNodeValue and setNodeValue (value) 56 | 57 | Returns a string or undef, depending on the node type. This method is provided 58 | for completeness. In other languages it saves the programmer an upcast. 59 | The value is either available thru some other method defined in the subclass, or 60 | else undef is returned. Here are the corresponding methods: 61 | Attr::getValue, Text::getData, CDATASection::getData, Comment::getData, 62 | ProcessingInstruction::getData. 63 | 64 | =item getParentNode and setParentNode (parentNode) 65 | 66 | The parent of this node. All nodes, except Document, 67 | DocumentFragment, and Attr may have a parent. However, if a 68 | node has just been created and not yet added to the tree, or 69 | if it has been removed from the tree, this is undef. 70 | 71 | =item getChildNodes 72 | 73 | A NodeList that contains all children of this node. If there 74 | are no children, this is a NodeList containing no nodes. The 75 | content of the returned NodeList is "live" in the sense that, 76 | for instance, changes to the children of the node object that 77 | it was created from are immediately reflected in the nodes 78 | returned by the NodeList accessors; it is not a static 79 | snapshot of the content of the node. This is true for every 80 | NodeList, including the ones returned by the 81 | getElementsByTagName method. 82 | 83 | NOTE: this implementation does not return a "live" NodeList for 84 | getElementsByTagName. See L. 85 | 86 | When this method is called in a list context, it returns a regular perl list 87 | containing the child nodes. Note that this list is not "live". E.g. 88 | 89 | @list = $node->getChildNodes; # returns a perl list 90 | $nodelist = $node->getChildNodes; # returns a NodeList (object reference) 91 | for my $kid ($node->getChildNodes) # iterate over the children of $node 92 | 93 | =item getFirstChild 94 | 95 | The first child of this node. If there is no such node, this returns undef. 96 | 97 | =item getLastChild 98 | 99 | The last child of this node. If there is no such node, this returns undef. 100 | 101 | =item getPreviousSibling 102 | 103 | The node immediately preceding this node. If there is no such 104 | node, this returns undef. 105 | 106 | =item getNextSibling 107 | 108 | The node immediately following this node. If there is no such node, this returns 109 | undef. 110 | 111 | =item getAttributes 112 | 113 | A NamedNodeMap containing the attributes (Attr nodes) of this node 114 | (if it is an Element) or undef otherwise. 115 | Note that adding/removing attributes from the returned object, also adds/removes 116 | attributes from the Element node that the NamedNodeMap came from. 117 | 118 | =item getOwnerDocument 119 | 120 | The Document object associated with this node. This is also 121 | the Document object used to create new nodes. When this node 122 | is a Document this is undef. 123 | 124 | =item insertBefore (newChild, refChild) 125 | 126 | Inserts the node newChild before the existing child node 127 | refChild. If refChild is undef, insert newChild at the end of 128 | the list of children. 129 | 130 | If newChild is a DocumentFragment object, all of its children 131 | are inserted, in the same order, before refChild. If the 132 | newChild is already in the tree, it is first removed. 133 | 134 | Return Value: The node being inserted. 135 | 136 | DOMExceptions: 137 | 138 | =over 4 139 | 140 | =item * HIERARCHY_REQUEST_ERR 141 | 142 | Raised if this node is of a type that does not allow children of the type of 143 | the newChild node, or if the node to insert is one of this node's ancestors. 144 | 145 | =item * WRONG_DOCUMENT_ERR 146 | 147 | Raised if newChild was created from a different document than the one that 148 | created this node. 149 | 150 | =item * NO_MODIFICATION_ALLOWED_ERR 151 | 152 | Raised if this node is readonly. 153 | 154 | =item * NOT_FOUND_ERR 155 | 156 | Raised if refChild is not a child of this node. 157 | 158 | =back 159 | 160 | =item replaceChild (newChild, oldChild) 161 | 162 | Replaces the child node oldChild with newChild in the list of 163 | children, and returns the oldChild node. If the newChild is 164 | already in the tree, it is first removed. 165 | 166 | Return Value: The node replaced. 167 | 168 | DOMExceptions: 169 | 170 | =over 4 171 | 172 | =item * HIERARCHY_REQUEST_ERR 173 | 174 | Raised if this node is of a type that does not allow children of the type of 175 | the newChild node, or it the node to put in is one of this node's ancestors. 176 | 177 | =item * WRONG_DOCUMENT_ERR 178 | 179 | Raised if newChild was created from a different document than the one that 180 | created this node. 181 | 182 | =item * NO_MODIFICATION_ALLOWED_ERR 183 | 184 | Raised if this node is readonly. 185 | 186 | =item * NOT_FOUND_ERR 187 | 188 | Raised if oldChild is not a child of this node. 189 | 190 | =back 191 | 192 | =item removeChild (oldChild) 193 | 194 | Removes the child node indicated by oldChild from the list of 195 | children, and returns it. 196 | 197 | Return Value: The node removed. 198 | 199 | DOMExceptions: 200 | 201 | =over 4 202 | 203 | =item * NO_MODIFICATION_ALLOWED_ERR 204 | 205 | Raised if this node is readonly. 206 | 207 | =item * NOT_FOUND_ERR 208 | 209 | Raised if oldChild is not a child of this node. 210 | 211 | =back 212 | 213 | =item appendChild (newChild) 214 | 215 | Adds the node newChild to the end of the list of children of 216 | this node. If the newChild is already in the tree, it is 217 | first removed. If it is a DocumentFragment object, the entire contents of 218 | the document fragment are moved into the child list of this node 219 | 220 | Return Value: The node added. 221 | 222 | DOMExceptions: 223 | 224 | =over 4 225 | 226 | =item * HIERARCHY_REQUEST_ERR 227 | 228 | Raised if this node is of a type that does not allow children of the type of 229 | the newChild node, or if the node to append is one of this node's ancestors. 230 | 231 | =item * WRONG_DOCUMENT_ERR 232 | 233 | Raised if newChild was created from a different document than the one that 234 | created this node. 235 | 236 | =item * NO_MODIFICATION_ALLOWED_ERR 237 | 238 | Raised if this node is readonly. 239 | 240 | =back 241 | 242 | =item hasChildNodes 243 | 244 | This is a convenience method to allow easy determination of 245 | whether a node has any children. 246 | 247 | Return Value: 1 if the node has any children, 0 otherwise. 248 | 249 | =item cloneNode (deep) 250 | 251 | Returns a duplicate of this node, i.e., serves as a generic 252 | copy constructor for nodes. The duplicate node has no parent 253 | (parentNode returns undef.). 254 | 255 | Cloning an Element copies all attributes and their values, 256 | including those generated by the XML processor to represent 257 | defaulted attributes, but this method does not copy any text 258 | it contains unless it is a deep clone, since the text is 259 | contained in a child Text node. Cloning any other type of 260 | node simply returns a copy of this node. 261 | 262 | Parameters: 263 | I If true, recursively clone the subtree under the specified node. 264 | If false, clone only the node itself (and its attributes, if it is an Element). 265 | 266 | Return Value: The duplicate node. 267 | 268 | =item normalize 269 | 270 | Puts all Text nodes in the full depth of the sub-tree 271 | underneath this Element into a "normal" form where only 272 | markup (e.g., tags, comments, processing instructions, CDATA 273 | sections, and entity references) separates Text nodes, i.e., 274 | there are no adjacent Text nodes. This can be used to ensure 275 | that the DOM view of a document is the same as if it were 276 | saved and re-loaded, and is useful when operations (such as 277 | XPointer lookups) that depend on a particular document tree 278 | structure are to be used. 279 | 280 | B: In the DOM Spec this method is defined in the Element and 281 | Document class interfaces only, but it doesn't hurt to have it here... 282 | 283 | =item getElementsByTagName (name [, recurse]) 284 | 285 | Returns a NodeList of all descendant elements with a given 286 | tag name, in the order in which they would be encountered in 287 | a preorder traversal of the Element tree. 288 | 289 | Parameters: 290 | I The name of the tag to match on. The special value "*" matches all tags. 291 | I Whether it should return only direct child nodes (0) or any descendant that matches the tag name (1). This argument is optional and defaults to 1. It is not part of the DOM spec. 292 | 293 | Return Value: A list of matching Element nodes. 294 | 295 | NOTE: this implementation does not return a "live" NodeList for 296 | getElementsByTagName. See L. 297 | 298 | When this method is called in a list context, it returns a regular perl list 299 | containing the result nodes. E.g. 300 | 301 | @list = $node->getElementsByTagName("tag"); # returns a perl list 302 | $nodelist = $node->getElementsByTagName("tag"); # returns a NodeList (object ref.) 303 | for my $elem ($node->getElementsByTagName("tag")) # iterate over the result nodes 304 | 305 | =back 306 | 307 | =head2 Additional methods not in the DOM Spec 308 | 309 | =over 4 310 | 311 | =item getNodeTypeName 312 | 313 | Return the string describing the node type. 314 | E.g. returns "ELEMENT_NODE" if getNodeType returns ELEMENT_NODE. 315 | It uses @XML::DOM::Node::NodeNames. 316 | 317 | =item toString 318 | 319 | Returns the entire subtree as a string. 320 | 321 | =item printToFile (filename) 322 | 323 | Prints the entire subtree to the file with the specified filename. 324 | 325 | Croaks: if the file could not be opened for writing. 326 | 327 | =item printToFileHandle (handle) 328 | 329 | Prints the entire subtree to the file handle. 330 | E.g. to print to STDOUT: 331 | 332 | $node->printToFileHandle (\*STDOUT); 333 | 334 | =item print (obj) 335 | 336 | Prints the entire subtree using the object's print method. E.g to print to a 337 | FileHandle object: 338 | 339 | $f = new FileHandle ("file.out", "w"); 340 | $node->print ($f); 341 | 342 | =item getChildIndex (child) 343 | 344 | Returns the index of the child node in the list returned by getChildNodes. 345 | 346 | Return Value: the index or -1 if the node is not found. 347 | 348 | =item getChildAtIndex (index) 349 | 350 | Returns the child node at the specifed index or undef. 351 | 352 | =item addText (text) 353 | 354 | Appends the specified string to the last child if it is a Text node, or else 355 | appends a new Text node (with the specified text.) 356 | 357 | Return Value: the last child if it was a Text node or else the new Text node. 358 | 359 | =item dispose 360 | 361 | Removes all circular references in this node and its descendants so the 362 | objects can be claimed for garbage collection. The objects should not be used 363 | afterwards. 364 | 365 | =item setOwnerDocument (doc) 366 | 367 | Sets the ownerDocument property of this node and all its children (and 368 | attributes etc.) to the specified document. 369 | This allows the user to cut and paste document subtrees between different 370 | XML::DOM::Documents. The node should be removed from the original document 371 | first, before calling setOwnerDocument. 372 | 373 | This method does nothing when called on a Document node. 374 | 375 | =item isAncestor (parent) 376 | 377 | Returns 1 if parent is an ancestor of this node or if it is this node itself. 378 | 379 | =item expandEntityRefs (str) 380 | 381 | Expands all the entity references in the string and returns the result. 382 | The entity references can be character references (e.g. "{" or "ῂ"), 383 | default entity references (""", ">", "<", "'" and "&") or 384 | entity references defined in Entity objects as part of the DocumentType of 385 | the owning Document. Character references are expanded into UTF-8. 386 | Parameter entity references (e.g. %ent;) are not expanded. 387 | 388 | =item to_sax ( %HANDLERS ) 389 | 390 | E.g. 391 | 392 | $node->to_sax (DocumentHandler => $my_handler, 393 | Handler => $handler2 ); 394 | 395 | %HANDLERS may contain the following handlers: 396 | 397 | =over 4 398 | 399 | =item * DocumentHandler 400 | 401 | =item * DTDHandler 402 | 403 | =item * EntityResolver 404 | 405 | =item * Handler 406 | 407 | Default handler when one of the above is not specified 408 | 409 | =back 410 | 411 | Each XML::DOM::Node generates the appropriate SAX callbacks (for the 412 | appropriate SAX handler.) Different SAX handlers can be plugged in to 413 | accomplish different things, e.g. L would check the node 414 | (currently only Document and Element nodes are supported), L 415 | would create a new DOM subtree (thereby, in essence, copying the Node) 416 | and in the near future, XML::Writer could print the node. 417 | All Perl SAX related work is still in flux, so this interface may change a 418 | little. 419 | 420 | See PerlSAX for the description of the SAX interface. 421 | 422 | =item check ( [$checker] ) 423 | 424 | See descriptions for check() in L and L. 425 | 426 | =item xql ( @XQL_OPTIONS ) 427 | 428 | To use the xql method, you must first I L and L. 429 | This method is basically a shortcut for: 430 | 431 | $query = new XML::XQL::Query ( @XQL_OPTIONS ); 432 | return $query->solve ($node); 433 | 434 | If the first parameter in @XQL_OPTIONS is the XQL expression, you can leave off 435 | the 'Expr' keyword, so: 436 | 437 | $node->xql ("doc//elem1[@attr]", @other_options); 438 | 439 | is identical to: 440 | 441 | $node->xql (Expr => "doc//elem1[@attr]", @other_options); 442 | 443 | See L for other available XQL_OPTIONS. 444 | See L and L for more info. 445 | 446 | =item isHidden () 447 | 448 | Whether the node is hidden. 449 | See L for details. 450 | 451 | =back 452 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/NodeList.pm: -------------------------------------------------------------------------------- 1 | ###################################################################### 2 | package XML::DOM::NodeList; 3 | ###################################################################### 4 | 5 | use vars qw ( $EMPTY ); 6 | 7 | # Empty NodeList 8 | $EMPTY = new XML::DOM::NodeList; 9 | 10 | sub new 11 | { 12 | bless [], $_[0]; 13 | } 14 | 15 | sub item 16 | { 17 | $_[0]->[$_[1]]; 18 | } 19 | 20 | sub getLength 21 | { 22 | int (@{$_[0]}); 23 | } 24 | 25 | #------------------------------------------------------------ 26 | # Extra method implementations 27 | 28 | sub dispose 29 | { 30 | my $self = shift; 31 | for my $kid (@{$self}) 32 | { 33 | $kid->dispose; 34 | } 35 | } 36 | 37 | sub setOwnerDocument 38 | { 39 | my ($self, $doc) = @_; 40 | for my $kid (@{$self}) 41 | { 42 | $kid->setOwnerDocument ($doc); 43 | } 44 | } 45 | 46 | 1; # package return code 47 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/NodeList.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::NodeList - A node list as used by XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | The NodeList interface provides the abstraction of an ordered 8 | collection of nodes, without defining or constraining how this 9 | collection is implemented. 10 | 11 | The items in the NodeList are accessible via an integral index, 12 | starting from 0. 13 | 14 | Although the DOM spec states that all NodeLists are "live" in that they 15 | allways reflect changes to the DOM tree, the NodeList returned by 16 | getElementsByTagName is not live in this implementation. See L 17 | for details. 18 | 19 | =head2 METHODS 20 | 21 | =over 4 22 | 23 | =item item (index) 24 | 25 | Returns the indexth item in the collection. If index is 26 | greater than or equal to the number of nodes in the list, 27 | this returns undef. 28 | 29 | =item getLength 30 | 31 | The number of nodes in the list. The range of valid child 32 | node indices is 0 to length-1 inclusive. 33 | 34 | =back 35 | 36 | =head2 Additional methods not in the DOM Spec 37 | 38 | =over 4 39 | 40 | =item dispose 41 | 42 | Removes all circular references in this NodeList and its descendants so the 43 | objects can be claimed for garbage collection. The objects should not be used 44 | afterwards. 45 | 46 | =back 47 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Notation.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Notation - An XML NOTATION in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Notation extends L. 8 | 9 | This node represents a Notation, e.g. 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | =head2 METHODS 20 | 21 | =over 4 22 | 23 | =item getName and setName (name) 24 | 25 | Returns (or sets) the Notation name, which is the first token after the 26 | NOTATION keyword. 27 | 28 | =item getSysId and setSysId (sysId) 29 | 30 | Returns (or sets) the system ID, which is the token after the optional 31 | SYSTEM keyword. 32 | 33 | =item getPubId and setPubId (pubId) 34 | 35 | Returns (or sets) the public ID, which is the token after the optional 36 | PUBLIC keyword. 37 | 38 | =item getBase 39 | 40 | This is passed by XML::Parser in the Notation handler. 41 | I don't know what it is yet. 42 | 43 | =item getNodeName 44 | 45 | Returns the same as getName. 46 | 47 | =back 48 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Parser.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Parser - An XML::Parser that builds XML::DOM document structures 4 | 5 | =head1 SYNOPSIS 6 | 7 | use XML::DOM; 8 | 9 | my $parser = new XML::DOM::Parser; 10 | my $doc = $parser->parsefile ("file.xml"); 11 | $doc->dispose; # Avoid memory leaks - cleanup circular references 12 | 13 | =head1 DESCRIPTION 14 | 15 | XML::DOM::Parser extends L 16 | 17 | The XML::Parser module was written by Clark Cooper and 18 | is built on top of XML::Parser::Expat, 19 | which is a lower level interface to James Clark's expat library. 20 | 21 | XML::DOM::Parser parses XML strings or files 22 | and builds a data structure that conforms to the API of the Document Object 23 | Model as described at L. 24 | See the L manpage for other additional properties of the 25 | XML::DOM::Parser class. 26 | Note that the 'Style' property should not be used (it is set internally.) 27 | 28 | The XML::Parser B option is more or less supported, in that it will 29 | generate EntityReference objects whenever an entity reference is encountered 30 | in character data. I'm not sure how useful this is. Any comments are welcome. 31 | 32 | As described in the synopsis, when you create an XML::DOM::Parser object, 33 | the parse and parsefile methods create an L object 34 | from the specified input. This Document object can then be examined, modified and 35 | written back out to a file or converted to a string. 36 | 37 | When using XML::DOM with XML::Parser version 2.19 and up, setting the 38 | XML::DOM::Parser option B to 1 will store CDATASections in 39 | CDATASection nodes, instead of converting them to Text nodes. 40 | Subsequent CDATASection nodes will be merged into one. Let me know if this 41 | is a problem. 42 | 43 | =head1 Using LWP to parse URLs 44 | 45 | The parsefile() method now also supports URLs, e.g. I. 46 | It uses LWP to download the file and then calls parse() on the resulting string. 47 | By default it will use a L that is created as follows: 48 | 49 | use LWP::UserAgent; 50 | $LWP_USER_AGENT = LWP::UserAgent->new; 51 | $LWP_USER_AGENT->env_proxy; 52 | 53 | Note that env_proxy reads proxy settings from environment variables, which is what I need to 54 | do to get thru our firewall. If you want to use a different LWP::UserAgent, you can either set 55 | it globally with: 56 | 57 | XML::DOM::Parser::set_LWP_UserAgent ($my_agent); 58 | 59 | or, you can specify it for a specific XML::DOM::Parser by passing it to the constructor: 60 | 61 | my $parser = new XML::DOM::Parser (LWP_UserAgent => $my_agent); 62 | 63 | Currently, LWP is used when the filename (passed to parsefile) starts with one of 64 | the following URL schemes: http, https, ftp, wais, gopher, or file (followed by a colon.) 65 | If I missed one, please let me know. 66 | 67 | The LWP modules are part of libwww-perl which is available at CPAN. 68 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/PerlSAX.pm: -------------------------------------------------------------------------------- 1 | package XML::DOM::PerlSAX; 2 | use strict; 3 | 4 | BEGIN 5 | { 6 | if ($^W) 7 | { 8 | warn "XML::DOM::PerlSAX has been renamed to XML::Handler::BuildDOM, please modify your code accordingly."; 9 | } 10 | } 11 | 12 | use XML::Handler::BuildDOM; 13 | use vars qw{ @ISA }; 14 | @ISA = qw{ XML::Handler::BuildDOM }; 15 | 16 | 1; # package return code 17 | 18 | __END__ 19 | 20 | =head1 NAME 21 | 22 | XML::DOM::PerlSAX - Old name of L 23 | 24 | =head1 SYNOPSIS 25 | 26 | See L 27 | 28 | =head1 DESCRIPTION 29 | 30 | XML::DOM::PerlSAX was renamed to L to comply 31 | with naming conventions for PerlSAX filters/handlers. 32 | 33 | For backward compatibility, this package will remain in existence 34 | (it simply includes XML::Handler::BuildDOM), but it will print a warning when 35 | running with I<'perl -w'>. 36 | 37 | =head1 AUTHOR 38 | 39 | Enno Derksen is the original author. 40 | 41 | Send bug reports, hints, tips, suggestions to T.J Mather at 42 | >. 43 | 44 | =head1 SEE ALSO 45 | 46 | L, L 47 | 48 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/ProcessingInstruction.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::ProcessingInstruction - An XML processing instruction in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::ProcessingInstruction extends L. 8 | 9 | It represents a "processing instruction", used in XML as a way to keep 10 | processor-specific information in the text of the document. An example: 11 | 12 | 13 | 14 | Here, "PI" is the target and "processing instruction" is the data. 15 | 16 | =head2 METHODS 17 | 18 | =over 4 19 | 20 | =item getTarget 21 | 22 | The target of this processing instruction. XML defines this 23 | as being the first token following the markup that begins the 24 | processing instruction. 25 | 26 | =item getData and setData (data) 27 | 28 | The content of this processing instruction. This is from the 29 | first non white space character after the target to the 30 | character immediately preceding the ?>. 31 | 32 | =back 33 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/Text.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::Text - A piece of XML text in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::Text extends L, which extends 8 | L. 9 | 10 | The Text interface represents the textual content (termed character 11 | data in XML) of an Element or Attr. If there is no markup inside an 12 | element's content, the text is contained in a single object 13 | implementing the Text interface that is the only child of the element. 14 | If there is markup, it is parsed into a list of elements and Text nodes 15 | that form the list of children of the element. 16 | 17 | When a document is first made available via the DOM, there is only one 18 | Text node for each block of text. Users may create adjacent Text nodes 19 | that represent the contents of a given element without any intervening 20 | markup, but should be aware that there is no way to represent the 21 | separations between these nodes in XML or HTML, so they will not (in 22 | general) persist between DOM editing sessions. The normalize() method 23 | on Element merges any such adjacent Text objects into a single node for 24 | each block of text; this is recommended before employing operations 25 | that depend on a particular document structure, such as navigation with 26 | XPointers. 27 | 28 | =head2 METHODS 29 | 30 | =over 4 31 | 32 | =item splitText (offset) 33 | 34 | Breaks this Text node into two Text nodes at the specified 35 | offset, keeping both in the tree as siblings. This node then 36 | only contains all the content up to the offset point. And a 37 | new Text node, which is inserted as the next sibling of this 38 | node, contains all the content at and after the offset point. 39 | 40 | Parameters: 41 | I The offset at which to split, starting from 0. 42 | 43 | Return Value: The new Text node. 44 | 45 | DOMExceptions: 46 | 47 | =over 4 48 | 49 | =item * INDEX_SIZE_ERR 50 | 51 | Raised if the specified offset is negative or greater than the number of 52 | characters in data. 53 | 54 | =item * NO_MODIFICATION_ALLOWED_ERR 55 | 56 | Raised if this node is readonly. 57 | 58 | =back 59 | 60 | =back 61 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/DOM/XMLDecl.pod: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | XML::DOM::XMLDecl - XML declaration in XML::DOM 4 | 5 | =head1 DESCRIPTION 6 | 7 | XML::DOM::XMLDecl extends L, but is not part of the DOM Level 1 8 | specification. 9 | 10 | It contains the XML declaration, e.g. 11 | 12 | 13 | 14 | See also XML::DOM::Document::getXMLDecl. 15 | 16 | =head2 METHODS 17 | 18 | =over 4 19 | 20 | =item getVersion and setVersion (version) 21 | 22 | Returns and sets the XML version. At the time of this writing the version should 23 | always be "1.0" 24 | 25 | =item getEncoding and setEncoding (encoding) 26 | 27 | undef may be specified for the encoding value. 28 | 29 | =item getStandalone and setStandalone (standalone) 30 | 31 | undef may be specified for the standalone value. 32 | 33 | =back 34 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/Handler/BuildDOM.pm: -------------------------------------------------------------------------------- 1 | package XML::Handler::BuildDOM; 2 | use strict; 3 | use XML::DOM; 4 | 5 | # 6 | # TODO: 7 | # - add support for parameter entity references 8 | # - expand API: insert Elements in the tree or stuff into DocType etc. 9 | 10 | sub new 11 | { 12 | my ($class, %args) = @_; 13 | bless \%args, $class; 14 | } 15 | 16 | #-------- PerlSAX Handler methods ------------------------------ 17 | 18 | sub start_document # was Init 19 | { 20 | my $self = shift; 21 | 22 | # Define Document if it's not set & not obtainable from Element or DocType 23 | $self->{Document} ||= 24 | (defined $self->{Element} ? $self->{Element}->getOwnerDocument : undef) 25 | || (defined $self->{DocType} ? $self->{DocType}->getOwnerDocument : undef) 26 | || new XML::DOM::Document(); 27 | 28 | $self->{Element} ||= $self->{Document}; 29 | 30 | unless (defined $self->{DocType}) 31 | { 32 | $self->{DocType} = $self->{Document}->getDoctype 33 | if defined $self->{Document}; 34 | 35 | unless (defined $self->{Doctype}) 36 | { 37 | #?? should be $doc->createDocType for extensibility! 38 | $self->{DocType} = new XML::DOM::DocumentType ($self->{Document}); 39 | $self->{Document}->setDoctype ($self->{DocType}); 40 | } 41 | } 42 | 43 | # Prepare for document prolog 44 | $self->{InProlog} = 1; 45 | 46 | # We haven't passed the root element yet 47 | $self->{EndDoc} = 0; 48 | 49 | undef $self->{LastText}; 50 | } 51 | 52 | sub end_document # was Final 53 | { 54 | my $self = shift; 55 | unless ($self->{SawDocType}) 56 | { 57 | my $doctype = $self->{Document}->removeDoctype; 58 | $doctype->dispose; 59 | #?? do we always want to destroy the Doctype? 60 | } 61 | $self->{Document}; 62 | } 63 | 64 | sub characters # was Char 65 | { 66 | my $self = $_[0]; 67 | my $str = $_[1]->{Data}; 68 | 69 | if ($self->{InCDATA} && $self->{KeepCDATA}) 70 | { 71 | undef $self->{LastText}; 72 | # Merge text with previous node if possible 73 | $self->{Element}->addCDATA ($str); 74 | } 75 | else 76 | { 77 | # Merge text with previous node if possible 78 | # Used to be: $expat->{DOM_Element}->addText ($str); 79 | if ($self->{LastText}) 80 | { 81 | $self->{LastText}->appendData ($str); 82 | } 83 | else 84 | { 85 | $self->{LastText} = $self->{Document}->createTextNode ($str); 86 | $self->{Element}->appendChild ($self->{LastText}); 87 | } 88 | } 89 | } 90 | 91 | sub start_element # was Start 92 | { 93 | my ($self, $hash) = @_; 94 | my $elem = $hash->{Name}; 95 | my $attr = $hash->{Attributes}; 96 | 97 | my $parent = $self->{Element}; 98 | my $doc = $self->{Document}; 99 | 100 | if ($parent == $doc) 101 | { 102 | # End of document prolog, i.e. start of first Element 103 | $self->{InProlog} = 0; 104 | } 105 | 106 | undef $self->{LastText}; 107 | my $node = $doc->createElement ($elem); 108 | $self->{Element} = $node; 109 | $parent->appendChild ($node); 110 | 111 | my $i = 0; 112 | my $n = scalar keys %$attr; 113 | return unless $n; 114 | 115 | if (exists $hash->{AttributeOrder}) 116 | { 117 | my $defaulted = $hash->{Defaulted}; 118 | my @order = @{ $hash->{AttributeOrder} }; 119 | 120 | # Specified attributes 121 | for (my $i = 0; $i < $defaulted; $i++) 122 | { 123 | my $a = $order[$i]; 124 | my $att = $doc->createAttribute ($a, $attr->{$a}, 1); 125 | $node->setAttributeNode ($att); 126 | } 127 | 128 | # Defaulted attributes 129 | for (my $i = $defaulted; $i < @order; $i++) 130 | { 131 | my $a = $order[$i]; 132 | my $att = $doc->createAttribute ($elem, $attr->{$a}, 0); 133 | $node->setAttributeNode ($att); 134 | } 135 | } 136 | else 137 | { 138 | # We're assuming that all attributes were specified (1) 139 | for my $a (keys %$attr) 140 | { 141 | my $att = $doc->createAttribute ($a, $attr->{$a}, 1); 142 | $node->setAttributeNode ($att); 143 | } 144 | } 145 | } 146 | 147 | sub end_element 148 | { 149 | my $self = shift; 150 | $self->{Element} = $self->{Element}->getParentNode; 151 | undef $self->{LastText}; 152 | 153 | # Check for end of root element 154 | $self->{EndDoc} = 1 if ($self->{Element} == $self->{Document}); 155 | } 156 | 157 | sub entity_reference # was Default 158 | { 159 | my $self = $_[0]; 160 | my $name = $_[1]->{Name}; 161 | 162 | $self->{Element}->appendChild ( 163 | $self->{Document}->createEntityReference ($name)); 164 | undef $self->{LastText}; 165 | } 166 | 167 | sub start_cdata 168 | { 169 | my $self = shift; 170 | $self->{InCDATA} = 1; 171 | } 172 | 173 | sub end_cdata 174 | { 175 | my $self = shift; 176 | $self->{InCDATA} = 0; 177 | } 178 | 179 | sub comment 180 | { 181 | my $self = $_[0]; 182 | 183 | local $XML::DOM::IgnoreReadOnly = 1; 184 | 185 | undef $self->{LastText}; 186 | my $comment = $self->{Document}->createComment ($_[1]->{Data}); 187 | $self->{Element}->appendChild ($comment); 188 | } 189 | 190 | sub doctype_decl 191 | { 192 | my ($self, $hash) = @_; 193 | 194 | $self->{DocType}->setParams ($hash->{Name}, $hash->{SystemId}, 195 | $hash->{PublicId}, $hash->{Internal}); 196 | $self->{SawDocType} = 1; 197 | } 198 | 199 | sub attlist_decl 200 | { 201 | my ($self, $hash) = @_; 202 | 203 | local $XML::DOM::IgnoreReadOnly = 1; 204 | 205 | $self->{DocType}->addAttDef ($hash->{ElementName}, 206 | $hash->{AttributeName}, 207 | $hash->{Type}, 208 | $hash->{Default}, 209 | $hash->{Fixed}); 210 | } 211 | 212 | sub xml_decl 213 | { 214 | my ($self, $hash) = @_; 215 | 216 | local $XML::DOM::IgnoreReadOnly = 1; 217 | 218 | undef $self->{LastText}; 219 | $self->{Document}->setXMLDecl (new XML::DOM::XMLDecl ($self->{Document}, 220 | $hash->{Version}, 221 | $hash->{Encoding}, 222 | $hash->{Standalone})); 223 | } 224 | 225 | sub entity_decl 226 | { 227 | my ($self, $hash) = @_; 228 | 229 | local $XML::DOM::IgnoreReadOnly = 1; 230 | 231 | # Parameter Entities names are passed starting with '%' 232 | my $parameter = 0; 233 | 234 | #?? parameter entities currently not supported by PerlSAX! 235 | 236 | undef $self->{LastText}; 237 | $self->{DocType}->addEntity ($parameter, $hash->{Name}, $hash->{Value}, 238 | $hash->{SystemId}, $hash->{PublicId}, 239 | $hash->{Notation}); 240 | } 241 | 242 | # Unparsed is called when it encounters e.g: 243 | # 244 | # 245 | # 246 | sub unparsed_decl 247 | { 248 | my ($self, $hash) = @_; 249 | 250 | local $XML::DOM::IgnoreReadOnly = 1; 251 | 252 | # same as regular ENTITY, as far as DOM is concerned 253 | $self->entity_decl ($hash); 254 | } 255 | 256 | sub element_decl 257 | { 258 | my ($self, $hash) = @_; 259 | 260 | local $XML::DOM::IgnoreReadOnly = 1; 261 | 262 | undef $self->{LastText}; 263 | $self->{DocType}->addElementDecl ($hash->{Name}, $hash->{Model}); 264 | } 265 | 266 | sub notation_decl 267 | { 268 | my ($self, $hash) = @_; 269 | 270 | local $XML::DOM::IgnoreReadOnly = 1; 271 | 272 | undef $self->{LastText}; 273 | $self->{DocType}->addNotation ($hash->{Name}, $hash->{Base}, 274 | $hash->{SystemId}, $hash->{PublicId}); 275 | } 276 | 277 | sub processing_instruction 278 | { 279 | my ($self, $hash) = @_; 280 | 281 | local $XML::DOM::IgnoreReadOnly = 1; 282 | 283 | undef $self->{LastText}; 284 | $self->{Element}->appendChild (new XML::DOM::ProcessingInstruction 285 | ($self->{Document}, $hash->{Target}, $hash->{Data})); 286 | } 287 | 288 | return 1; 289 | 290 | __END__ 291 | 292 | =head1 NAME 293 | 294 | XML::Handler::BuildDOM - PerlSAX handler that creates XML::DOM document structures 295 | 296 | =head1 SYNOPSIS 297 | 298 | use XML::Handler::BuildDOM; 299 | use XML::Parser::PerlSAX; 300 | 301 | my $handler = new XML::Handler::BuildDOM (KeepCDATA => 1); 302 | my $parser = new XML::Parser::PerlSAX (Handler => $handler); 303 | 304 | my $doc = $parser->parsefile ("file.xml"); 305 | 306 | =head1 DESCRIPTION 307 | 308 | XML::Handler::BuildDOM creates L document structures 309 | (i.e. L) from PerlSAX events. 310 | 311 | This class used to be called L prior to libxml-enno 1.0.1. 312 | 313 | =head2 CONSTRUCTOR OPTIONS 314 | 315 | The XML::Handler::BuildDOM constructor supports the following options: 316 | 317 | =over 4 318 | 319 | =item * KeepCDATA => 1 320 | 321 | If set to 0 (default), CDATASections will be converted to regular text. 322 | 323 | =item * Document => $doc 324 | 325 | If undefined, start_document will extract it from Element or DocType (if set), 326 | otherwise it will create a new XML::DOM::Document. 327 | 328 | =item * Element => $elem 329 | 330 | If undefined, it is set to Document. This will be the insertion point (or parent) 331 | for the nodes defined by the following callbacks. 332 | 333 | =item * DocType => $doctype 334 | 335 | If undefined, start_document will extract it from Document (if possible). 336 | Otherwise it adds a new XML::DOM::DocumentType to the Document. 337 | 338 | =back 339 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/XML/RegExp.pm: -------------------------------------------------------------------------------- 1 | package XML::RegExp; 2 | 3 | use vars qw( $BaseChar $Ideographic $Letter $Digit $Extender 4 | $CombiningChar $NameChar 5 | $EntityRef $CharRef $Reference 6 | $Name $NmToken $AttValue 7 | $NCNameChar $NCName $Prefix $LocalPart $QName 8 | $VERSION ); 9 | 10 | $VERSION = '0.04'; 11 | 12 | $BaseChar = '(?:[a-zA-Z]|\xC3[\x80-\x96\x98-\xB6\xB8-\xBF]|\xC4[\x80-\xB1\xB4-\xBE]|\xC5[\x81-\x88\x8A-\xBE]|\xC6[\x80-\xBF]|\xC7[\x80-\x83\x8D-\xB0\xB4\xB5\xBA-\xBF]|\xC8[\x80-\x97]|\xC9[\x90-\xBF]|\xCA[\x80-\xA8\xBB-\xBF]|\xCB[\x80\x81]|\xCE[\x86\x88-\x8A\x8C\x8E-\xA1\xA3-\xBF]|\xCF[\x80-\x8E\x90-\x96\x9A\x9C\x9E\xA0\xA2-\xB3]|\xD0[\x81-\x8C\x8E-\xBF]|\xD1[\x80-\x8F\x91-\x9C\x9E-\xBF]|\xD2[\x80\x81\x90-\xBF]|\xD3[\x80-\x84\x87\x88\x8B\x8C\x90-\xAB\xAE-\xB5\xB8\xB9]|\xD4[\xB1-\xBF]|\xD5[\x80-\x96\x99\xA1-\xBF]|\xD6[\x80-\x86]|\xD7[\x90-\xAA\xB0-\xB2]|\xD8[\xA1-\xBA]|\xD9[\x81-\x8A\xB1-\xBF]|\xDA[\x80-\xB7\xBA-\xBE]|\xDB[\x80-\x8E\x90-\x93\x95\xA5\xA6]|\xE0(?:\xA4[\x85-\xB9\xBD]|\xA5[\x98-\xA1]|\xA6[\x85-\x8C\x8F\x90\x93-\xA8\xAA-\xB0\xB2\xB6-\xB9]|\xA7[\x9C\x9D\x9F-\xA1\xB0\xB1]|\xA8[\x85-\x8A\x8F\x90\x93-\xA8\xAA-\xB0\xB2\xB3\xB5\xB6\xB8\xB9]|\xA9[\x99-\x9C\x9E\xB2-\xB4]|\xAA[\x85-\x8B\x8D\x8F-\x91\x93-\xA8\xAA-\xB0\xB2\xB3\xB5-\xB9\xBD]|\xAB\xA0|\xAC[\x85-\x8C\x8F\x90\x93-\xA8\xAA-\xB0\xB2\xB3\xB6-\xB9\xBD]|\xAD[\x9C\x9D\x9F-\xA1]|\xAE[\x85-\x8A\x8E-\x90\x92-\x95\x99\x9A\x9C\x9E\x9F\xA3\xA4\xA8-\xAA\xAE-\xB5\xB7-\xB9]|\xB0[\x85-\x8C\x8E-\x90\x92-\xA8\xAA-\xB3\xB5-\xB9]|\xB1[\xA0\xA1]|\xB2[\x85-\x8C\x8E-\x90\x92-\xA8\xAA-\xB3\xB5-\xB9]|\xB3[\x9E\xA0\xA1]|\xB4[\x85-\x8C\x8E-\x90\x92-\xA8\xAA-\xB9]|\xB5[\xA0\xA1]|\xB8[\x81-\xAE\xB0\xB2\xB3]|\xB9[\x80-\x85]|\xBA[\x81\x82\x84\x87\x88\x8A\x8D\x94-\x97\x99-\x9F\xA1-\xA3\xA5\xA7\xAA\xAB\xAD\xAE\xB0\xB2\xB3\xBD]|\xBB[\x80-\x84]|\xBD[\x80-\x87\x89-\xA9])|\xE1(?:\x82[\xA0-\xBF]|\x83[\x80-\x85\x90-\xB6]|\x84[\x80\x82\x83\x85-\x87\x89\x8B\x8C\x8E-\x92\xBC\xBE]|\x85[\x80\x8C\x8E\x90\x94\x95\x99\x9F-\xA1\xA3\xA5\xA7\xA9\xAD\xAE\xB2\xB3\xB5]|\x86[\x9E\xA8\xAB\xAE\xAF\xB7\xB8\xBA\xBC-\xBF]|\x87[\x80-\x82\xAB\xB0\xB9]|[\xB8\xB9][\x80-\xBF]|\xBA[\x80-\x9B\xA0-\xBF]|\xBB[\x80-\xB9]|\xBC[\x80-\x95\x98-\x9D\xA0-\xBF]|\xBD[\x80-\x85\x88-\x8D\x90-\x97\x99\x9B\x9D\x9F-\xBD]|\xBE[\x80-\xB4\xB6-\xBC\xBE]|\xBF[\x82-\x84\x86-\x8C\x90-\x93\x96-\x9B\xA0-\xAC\xB2-\xB4\xB6-\xBC])|\xE2(?:\x84[\xA6\xAA\xAB\xAE]|\x86[\x80-\x82])|\xE3(?:\x81[\x81-\xBF]|\x82[\x80-\x94\xA1-\xBF]|\x83[\x80-\xBA]|\x84[\x85-\xAC])|\xEA(?:[\xB0-\xBF][\x80-\xBF])|\xEB(?:[\x80-\xBF][\x80-\xBF])|\xEC(?:[\x80-\xBF][\x80-\xBF])|\xED(?:[\x80-\x9D][\x80-\xBF]|\x9E[\x80-\xA3]))'; 13 | 14 | $Ideographic = '(?:\xE3\x80[\x87\xA1-\xA9]|\xE4(?:[\xB8-\xBF][\x80-\xBF])|\xE5(?:[\x80-\xBF][\x80-\xBF])|\xE6(?:[\x80-\xBF][\x80-\xBF])|\xE7(?:[\x80-\xBF][\x80-\xBF])|\xE8(?:[\x80-\xBF][\x80-\xBF])|\xE9(?:[\x80-\xBD][\x80-\xBF]|\xBE[\x80-\xA5]))'; 15 | 16 | $Digit = '(?:[0-9]|\xD9[\xA0-\xA9]|\xDB[\xB0-\xB9]|\xE0(?:\xA5[\xA6-\xAF]|\xA7[\xA6-\xAF]|\xA9[\xA6-\xAF]|\xAB[\xA6-\xAF]|\xAD[\xA6-\xAF]|\xAF[\xA7-\xAF]|\xB1[\xA6-\xAF]|\xB3[\xA6-\xAF]|\xB5[\xA6-\xAF]|\xB9[\x90-\x99]|\xBB[\x90-\x99]|\xBC[\xA0-\xA9]))'; 17 | 18 | $Extender = '(?:\xC2\xB7|\xCB[\x90\x91]|\xCE\x87|\xD9\x80|\xE0(?:\xB9\x86|\xBB\x86)|\xE3(?:\x80[\x85\xB1-\xB5]|\x82[\x9D\x9E]|\x83[\xBC-\xBE]))'; 19 | 20 | $CombiningChar = '(?:\xCC[\x80-\xBF]|\xCD[\x80-\x85\xA0\xA1]|\xD2[\x83-\x86]|\xD6[\x91-\xA1\xA3-\xB9\xBB-\xBD\xBF]|\xD7[\x81\x82\x84]|\xD9[\x8B-\x92\xB0]|\xDB[\x96-\xA4\xA7\xA8\xAA-\xAD]|\xE0(?:\xA4[\x81-\x83\xBC\xBE\xBF]|\xA5[\x80-\x8D\x91-\x94\xA2\xA3]|\xA6[\x81-\x83\xBC\xBE\xBF]|\xA7[\x80-\x84\x87\x88\x8B-\x8D\x97\xA2\xA3]|\xA8[\x82\xBC\xBE\xBF]|\xA9[\x80-\x82\x87\x88\x8B-\x8D\xB0\xB1]|\xAA[\x81-\x83\xBC\xBE\xBF]|\xAB[\x80-\x85\x87-\x89\x8B-\x8D]|\xAC[\x81-\x83\xBC\xBE\xBF]|\xAD[\x80-\x83\x87\x88\x8B-\x8D\x96\x97]|\xAE[\x82\x83\xBE\xBF]|\xAF[\x80-\x82\x86-\x88\x8A-\x8D\x97]|\xB0[\x81-\x83\xBE\xBF]|\xB1[\x80-\x84\x86-\x88\x8A-\x8D\x95\x96]|\xB2[\x82\x83\xBE\xBF]|\xB3[\x80-\x84\x86-\x88\x8A-\x8D\x95\x96]|\xB4[\x82\x83\xBE\xBF]|\xB5[\x80-\x83\x86-\x88\x8A-\x8D\x97]|\xB8[\xB1\xB4-\xBA]|\xB9[\x87-\x8E]|\xBA[\xB1\xB4-\xB9\xBB\xBC]|\xBB[\x88-\x8D]|\xBC[\x98\x99\xB5\xB7\xB9\xBE\xBF]|\xBD[\xB1-\xBF]|\xBE[\x80-\x84\x86-\x8B\x90-\x95\x97\x99-\xAD\xB1-\xB7\xB9])|\xE2\x83[\x90-\x9C\xA1]|\xE3(?:\x80[\xAA-\xAF]|\x82[\x99\x9A]))'; 21 | 22 | $Letter = "(?:$BaseChar|$Ideographic)"; 23 | $NameChar = "(?:[-._:]|$Letter|$Digit|$CombiningChar|$Extender)"; 24 | 25 | $Name = "(?:(?:[:_]|$Letter)$NameChar*)"; 26 | $NmToken = "(?:$NameChar+)"; 27 | $EntityRef = "(?:\&$Name;)"; 28 | $CharRef = "(?:\&#(?:[0-9]+|x[0-9a-fA-F]+);)"; 29 | $Reference = "(?:$EntityRef|$CharRef)"; 30 | 31 | #?? what if it contains entity references? 32 | $AttValue = "(?:\"(?:[^\"&<]*|$Reference)\"|'(?:[^\'&<]|$Reference)*')"; 33 | 34 | ######################################################################### 35 | # The following definitions came from the XML Namespaces spec: 36 | ######################################################################### 37 | 38 | # Same as $NameChar without the ":" 39 | $NCNameChar = "(?:[-._]|$Letter|$Digit|$CombiningChar|$Extender)"; 40 | 41 | # Same as $Name without the colons 42 | $NCName = "(?:(?:_|$Letter)$NCNameChar*)"; 43 | 44 | $Prefix = $NCName; 45 | $LocalPart = $NCName; 46 | $QName = "(?:(?:$Prefix:)?$LocalPart)"; 47 | 48 | return 1; 49 | 50 | __END__ 51 | 52 | =head1 NAME 53 | 54 | XML::RegExp - Regular expressions for XML tokens 55 | 56 | =head1 SYNOPSIS 57 | 58 | use XML::RegExp; 59 | 60 | if ($my_name =~ /^$XML::RegExp::Name$/) 61 | { 62 | # $my_name is a valid XML 'Name' 63 | } 64 | 65 | =head1 DESCRIPTION 66 | 67 | This package contains regular expressions for the following XML tokens: 68 | BaseChar, Ideographic, Letter, Digit, Extender, CombiningChar, NameChar, 69 | EntityRef, CharRef, Reference, Name, NmToken, and AttValue. 70 | 71 | The definitions of these tokens were taken from the XML spec 72 | (Extensible Markup Language 1.0) at L. 73 | 74 | Also contains the regular expressions for the following tokens from the 75 | XML Namespaces spec at L: 76 | NCNameChar, NCName, QName, Prefix and LocalPart. 77 | 78 | =head1 AUTHOR 79 | 80 | Original Author is Enno Derksen > 81 | 82 | Please send bugs, comments and suggestions to T.J. Mather > 83 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/data/WordNet-2.0-Exceptions/WordNet-2.0.exc.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dqwang122/MLROUGE/24c4cb227eef91135919ab6dfbc82369310c0412/ROUGE-1.5.5/data/WordNet-2.0-Exceptions/WordNet-2.0.exc.db -------------------------------------------------------------------------------- /ROUGE-1.5.5/data/WordNet-2.0-Exceptions/adj.exc: -------------------------------------------------------------------------------- 1 | acer acer 2 | after after 3 | airier airy 4 | airiest airy 5 | all-arounder all-arounder 6 | angrier angry 7 | angriest angry 8 | archer archer 9 | artier arty 10 | artiest arty 11 | ashier ashy 12 | ashiest ashy 13 | assaulter assaulter 14 | attacker attacker 15 | backer backer 16 | baggier baggy 17 | baggiest baggy 18 | balkier balky 19 | balkiest balky 20 | balmier balmy 21 | balmiest balmy 22 | bandier bandy 23 | bandiest bandy 24 | bargainer bargainer 25 | barmier barmy 26 | barmiest barmy 27 | battier batty 28 | battiest batty 29 | baulkier baulky 30 | baulkiest baulky 31 | bawdier bawdy 32 | bawdiest bawdy 33 | bayer bayer 34 | beadier beady 35 | beadiest beady 36 | beastlier beastly 37 | beastliest beastly 38 | beater beater 39 | beefier beefy 40 | beefiest beefy 41 | beerier beery 42 | beeriest beery 43 | bendier bendy 44 | bendiest bendy 45 | best good 46 | better good well 47 | bigger big 48 | biggest big 49 | bitchier bitchy 50 | bitchiest bitchy 51 | biter biter 52 | bittier bitty 53 | bittiest bitty 54 | blearier bleary 55 | bleariest bleary 56 | bloodier bloody 57 | bloodiest bloody 58 | bloodthirstier bloodthirsty 59 | bloodthirstiest bloodthirsty 60 | blowier blowy 61 | blowiest blowy 62 | blowsier blowsy 63 | blowsiest blowsy 64 | blowzier blowzy 65 | blowziest blowzy 66 | bluer blue 67 | bluest blue 68 | boner boner 69 | bonier bony 70 | boniest bony 71 | bonnier bonny 72 | bonniest bonny 73 | boozier boozy 74 | booziest boozy 75 | boskier bosky 76 | boskiest bosky 77 | bossier bossy 78 | bossiest bossy 79 | botchier botchy 80 | botchiest botchy 81 | bother bother 82 | bouncier bouncy 83 | bounciest bouncy 84 | bounder bounder 85 | bower bower 86 | brainier brainy 87 | brainiest brainy 88 | brashier brashy 89 | brashiest brashy 90 | brassier brassy 91 | brassiest brassy 92 | brawnier brawny 93 | brawniest brawny 94 | breathier breathy 95 | breathiest breathy 96 | breezier breezy 97 | breeziest breezy 98 | brinier briny 99 | briniest briny 100 | britisher britisher 101 | broadcaster broadcaster 102 | brooder brooder 103 | broodier broody 104 | broodiest broody 105 | bubblier bubbly 106 | bubbliest bubbly 107 | buggier buggy 108 | buggiest buggy 109 | bulkier bulky 110 | bulkiest bulky 111 | bumpier bumpy 112 | bumpiest bumpy 113 | bunchier bunchy 114 | bunchiest bunchy 115 | burlier burly 116 | burliest burly 117 | burrier burry 118 | burriest burry 119 | burster burster 120 | bushier bushy 121 | bushiest bushy 122 | busier busy 123 | busiest busy 124 | buster buster 125 | bustier busty 126 | bustiest busty 127 | cagier cagey 128 | cagiest cagey 129 | camper camper 130 | cannier canny 131 | canniest canny 132 | canter canter 133 | cantier canty 134 | cantiest canty 135 | caster caster 136 | catchier catchy 137 | catchiest catchy 138 | cattier catty 139 | cattiest catty 140 | cer cer 141 | chancier chancy 142 | chanciest chancy 143 | charier chary 144 | chariest chary 145 | chattier chatty 146 | chattiest chatty 147 | cheekier cheeky 148 | cheekiest cheeky 149 | cheerier cheery 150 | cheeriest cheery 151 | cheesier cheesy 152 | cheesiest cheesy 153 | chestier chesty 154 | chestiest chesty 155 | chewier chewy 156 | chewiest chewy 157 | chillier chilly 158 | chilliest chilly 159 | chintzier chintzy 160 | chintziest chintzy 161 | chippier chippy 162 | chippiest chippy 163 | choosier choosy 164 | choosiest choosy 165 | choppier choppy 166 | choppiest choppy 167 | chubbier chubby 168 | chubbiest chubby 169 | chuffier chuffy 170 | chuffiest chuffy 171 | chummier chummy 172 | chummiest chummy 173 | chunkier chunky 174 | chunkiest chunky 175 | churchier churchy 176 | churchiest churchy 177 | clammier clammy 178 | clammiest clammy 179 | classier classy 180 | classiest classy 181 | cleanlier cleanly 182 | cleanliest cleanly 183 | clerklier clerkly 184 | clerkliest clerkly 185 | cloudier cloudy 186 | cloudiest cloudy 187 | clubbier clubby 188 | clubbiest clubby 189 | clumsier clumsy 190 | clumsiest clumsy 191 | cockier cocky 192 | cockiest cocky 193 | coder coder 194 | collier colly 195 | colliest colly 196 | comelier comely 197 | comeliest comely 198 | comfier comfy 199 | comfiest comfy 200 | cornier corny 201 | corniest corny 202 | cosier cosy 203 | cosiest cosy 204 | costlier costly 205 | costliest costly 206 | costumer costumer 207 | counterfeiter counterfeiter 208 | courtlier courtly 209 | courtliest courtly 210 | cozier cozy 211 | coziest cozy 212 | crabbier crabby 213 | crabbiest crabby 214 | cracker cracker 215 | craftier crafty 216 | craftiest crafty 217 | craggier craggy 218 | craggiest craggy 219 | crankier cranky 220 | crankiest cranky 221 | crasher crasher 222 | crawlier crawly 223 | crawliest crawly 224 | crazier crazy 225 | craziest crazy 226 | creamer creamer 227 | creamier creamy 228 | creamiest creamy 229 | creepier creepy 230 | creepiest creepy 231 | crispier crispy 232 | crispiest crispy 233 | crumbier crumby 234 | crumbiest crumby 235 | crumblier crumbly 236 | crumbliest crumbly 237 | crummier crummy 238 | crummiest crummy 239 | crustier crusty 240 | crustiest crusty 241 | curlier curly 242 | curliest curly 243 | customer customer 244 | cuter cute 245 | daffier daffy 246 | daffiest daffy 247 | daintier dainty 248 | daintiest dainty 249 | dandier dandy 250 | dandiest dandy 251 | deadlier deadly 252 | deadliest deadly 253 | dealer dealer 254 | deserter deserter 255 | dewier dewy 256 | dewiest dewy 257 | dicier dicey 258 | diciest dicey 259 | dimer dimer 260 | dimmer dim 261 | dimmest dim 262 | dingier dingy 263 | dingiest dingy 264 | dinkier dinky 265 | dinkiest dinky 266 | dippier dippy 267 | dippiest dippy 268 | dirtier dirty 269 | dirtiest dirty 270 | dishier dishy 271 | dishiest dishy 272 | dizzier dizzy 273 | dizziest dizzy 274 | dodgier dodgy 275 | dodgiest dodgy 276 | dopier dopey 277 | dopiest dopey 278 | dottier dotty 279 | dottiest dotty 280 | doughier doughy 281 | doughiest doughy 282 | doughtier doughty 283 | doughtiest doughty 284 | dowdier dowdy 285 | dowdiest dowdy 286 | dowier dowie dowy 287 | dowiest dowie dowy 288 | downer downer 289 | downier downy 290 | downiest downy 291 | dozier dozy 292 | doziest dozy 293 | drabber drab 294 | drabbest drab 295 | draftier drafty 296 | draftiest drafty 297 | draggier draggy 298 | draggiest draggy 299 | draughtier draughty 300 | draughtiest draughty 301 | dreamier dreamy 302 | dreamiest dreamy 303 | drearier dreary 304 | dreariest dreary 305 | dreggier dreggy 306 | dreggiest dreggy 307 | dresser dresser 308 | dressier dressy 309 | dressiest dressy 310 | drier dry 311 | driest dry 312 | drippier drippy 313 | drippiest drippy 314 | drowsier drowsy 315 | drowsiest drowsy 316 | dryer dry 317 | dryest dry 318 | dumpier dumpy 319 | dumpiest dumpy 320 | dunner dun 321 | dunnest dun 322 | duskier dusky 323 | duskiest dusky 324 | dustier dusty 325 | dustiest dusty 326 | earlier early 327 | earliest early 328 | earthier earthy 329 | earthiest earthy 330 | earthlier earthly 331 | earthliest earthly 332 | easier easy 333 | easiest easy 334 | easter easter 335 | eastsider eastsider 336 | edger edger 337 | edgier edgy 338 | edgiest edgy 339 | eerier eerie 340 | eeriest eerie 341 | emptier empty 342 | emptiest empty 343 | faker faker 344 | fancier fancy 345 | fanciest fancy 346 | fatter fat 347 | fattest fat 348 | fattier fatty 349 | fattiest fatty 350 | faultier faulty 351 | faultiest faulty 352 | feistier feisty 353 | feistiest feisty 354 | feller feller 355 | fiddlier fiddly 356 | fiddliest fiddly 357 | filmier filmy 358 | filmiest filmy 359 | filthier filthy 360 | filthiest filthy 361 | finnier finny 362 | finniest finny 363 | first-rater first-rater 364 | first-stringer first-stringer 365 | fishier fishy 366 | fishiest fishy 367 | fitter fit 368 | fittest fit 369 | flabbier flabby 370 | flabbiest flabby 371 | flaggier flaggy 372 | flaggiest flaggy 373 | flakier flaky 374 | flakiest flaky 375 | flasher flasher 376 | flashier flashy 377 | flashiest flashy 378 | flatter flat 379 | flattest flat 380 | flauntier flaunty 381 | flauntiest flaunty 382 | fledgier fledgy 383 | fledgiest fledgy 384 | fleecier fleecy 385 | fleeciest fleecy 386 | fleshier fleshy 387 | fleshiest fleshy 388 | fleshlier fleshly 389 | fleshliest fleshly 390 | flightier flighty 391 | flightiest flighty 392 | flimsier flimsy 393 | flimsiest flimsy 394 | flintier flinty 395 | flintiest flinty 396 | floatier floaty 397 | floatiest floaty 398 | floppier floppy 399 | floppiest floppy 400 | flossier flossy 401 | flossiest flossy 402 | fluffier fluffy 403 | fluffiest fluffy 404 | flukier fluky 405 | flukiest fluky 406 | foamier foamy 407 | foamiest foamy 408 | foggier foggy 409 | foggiest foggy 410 | folder folder 411 | folksier folksy 412 | folksiest folksy 413 | foolhardier foolhardy 414 | foolhardiest foolhardy 415 | fore-and-after fore-and-after 416 | foreigner foreigner 417 | forest forest 418 | founder founder 419 | foxier foxy 420 | foxiest foxy 421 | fratchier fratchy 422 | fratchiest fratchy 423 | freakier freaky 424 | freakiest freaky 425 | freer free 426 | freest free 427 | frenchier frenchy 428 | frenchiest frenchy 429 | friendlier friendly 430 | friendliest friendly 431 | friskier frisky 432 | friskiest frisky 433 | frizzier frizzy 434 | frizziest frizzy 435 | frizzlier frizzly 436 | frizzliest frizzly 437 | frostier frosty 438 | frostiest frosty 439 | frouzier frouzy 440 | frouziest frouzy 441 | frowsier frowsy 442 | frowsiest frowsy 443 | frowzier frowzy 444 | frowziest frowzy 445 | fruitier fruity 446 | fruitiest fruity 447 | funkier funky 448 | funkiest funky 449 | funnier funny 450 | funniest funny 451 | furrier furry 452 | furriest furry 453 | fussier fussy 454 | fussiest fussy 455 | fustier fusty 456 | fustiest fusty 457 | fuzzier fuzzy 458 | fuzziest fuzzy 459 | gabbier gabby 460 | gabbiest gabby 461 | gamier gamy 462 | gamiest gamy 463 | gammier gammy 464 | gammiest gammy 465 | gassier gassy 466 | gassiest gassy 467 | gaudier gaudy 468 | gaudiest gaudy 469 | gauzier gauzy 470 | gauziest gauzy 471 | gawkier gawky 472 | gawkiest gawky 473 | ghastlier ghastly 474 | ghastliest ghastly 475 | ghostlier ghostly 476 | ghostliest ghostly 477 | giddier giddy 478 | giddiest giddy 479 | gladder glad 480 | gladdest glad 481 | glassier glassy 482 | glassiest glassy 483 | glibber glib 484 | glibbest glib 485 | gloomier gloomy 486 | gloomiest gloomy 487 | glossier glossy 488 | glossiest glossy 489 | glummer glum 490 | glummest glum 491 | godlier godly 492 | godliest godly 493 | goer goer 494 | goner goner 495 | goodlier goodly 496 | goodliest goodly 497 | goofier goofy 498 | goofiest goofy 499 | gooier gooey 500 | gooiest gooey 501 | goosier goosy 502 | goosiest goosy 503 | gorier gory 504 | goriest gory 505 | gradelier gradely 506 | gradeliest gradely 507 | grader grader 508 | grainier grainy 509 | grainiest grainy 510 | grassier grassy 511 | grassiest grassy 512 | greasier greasy 513 | greasiest greasy 514 | greedier greedy 515 | greediest greedy 516 | grimmer grim 517 | grimmest grim 518 | grislier grisly 519 | grisliest grisly 520 | grittier gritty 521 | grittiest gritty 522 | grizzlier grizzly 523 | grizzliest grizzly 524 | groggier groggy 525 | groggiest groggy 526 | groovier groovy 527 | grooviest groovy 528 | grottier grotty 529 | grottiest grotty 530 | grounder grounder 531 | grouper grouper 532 | groutier grouty 533 | groutiest grouty 534 | grubbier grubby 535 | grubbiest grubby 536 | grumpier grumpy 537 | grumpiest grumpy 538 | guest guest 539 | guiltier guilty 540 | guiltiest guilty 541 | gummier gummy 542 | gummiest gummy 543 | gushier gushy 544 | gushiest gushy 545 | gustier gusty 546 | gustiest gusty 547 | gutsier gutsy 548 | gutsiest gutsy 549 | hairier hairy 550 | hairiest hairy 551 | halfways halfway 552 | halter halter 553 | hammier hammy 554 | hammiest hammy 555 | handier handy 556 | handiest handy 557 | happier happy 558 | happiest happy 559 | hardier hardy 560 | hardiest hardy 561 | hastier hasty 562 | hastiest hasty 563 | haughtier haughty 564 | haughtiest haughty 565 | hazier hazy 566 | haziest hazy 567 | header header 568 | headier heady 569 | headiest heady 570 | healthier healthy 571 | healthiest healthy 572 | heartier hearty 573 | heartiest hearty 574 | heavier heavy 575 | heaviest heavy 576 | heftier hefty 577 | heftiest hefty 578 | hepper hep 579 | heppest hep 580 | herbier herby 581 | herbiest herby 582 | hinder hind 583 | hipper hip 584 | hippest hip 585 | hippier hippy 586 | hippiest hippy 587 | hoarier hoary 588 | hoariest hoary 589 | holier holy 590 | holiest holy 591 | homelier homely 592 | homeliest homely 593 | homer homer 594 | homier homey 595 | homiest homey 596 | hornier horny 597 | horniest horny 598 | horsier horsy 599 | horsiest horsy 600 | hotter hot 601 | hottest hot 602 | humpier humpy 603 | humpiest humpy 604 | hunger hunger 605 | hungrier hungry 606 | hungriest hungry 607 | huskier husky 608 | huskiest husky 609 | icier icy 610 | iciest icy 611 | inkier inky 612 | inkiest inky 613 | insider insider 614 | interest interest 615 | jaggier jaggy 616 | jaggiest jaggy 617 | jammier jammy 618 | jammiest jammy 619 | jauntier jaunty 620 | jauntiest jaunty 621 | jazzier jazzy 622 | jazziest jazzy 623 | jerkier jerky 624 | jerkiest jerky 625 | jointer jointer 626 | jollier jolly 627 | jolliest jolly 628 | juicier juicy 629 | juiciest juicy 630 | jumpier jumpy 631 | jumpiest jumpy 632 | kindlier kindly 633 | kindliest kindly 634 | kinkier kinky 635 | kinkiest kinky 636 | knottier knotty 637 | knottiest knotty 638 | knurlier knurly 639 | knurliest knurly 640 | kookier kooky 641 | kookiest kooky 642 | lacier lacy 643 | laciest lacy 644 | lairier lairy 645 | lairiest lairy 646 | lakier laky 647 | lakiest laky 648 | lander lander 649 | lankier lanky 650 | lankiest lanky 651 | lathier lathy 652 | lathiest lathy 653 | layer layer 654 | lazier lazy 655 | laziest lazy 656 | leafier leafy 657 | leafiest leafy 658 | leakier leaky 659 | leakiest leaky 660 | learier leary 661 | leariest leary 662 | leer leer 663 | leerier leery 664 | leeriest leery 665 | left-hander left-hander 666 | left-winger left-winger 667 | leggier leggy 668 | leggiest leggy 669 | lengthier lengthy 670 | lengthiest lengthy 671 | ler ler 672 | leveler leveler 673 | limier limy 674 | limiest limy 675 | lippier lippy 676 | lippiest lippy 677 | liter liter 678 | livelier lively 679 | liveliest lively 680 | liver liver 681 | loather loather 682 | loftier lofty 683 | loftiest lofty 684 | logier logy 685 | logiest logy 686 | lonelier lonely 687 | loneliest lonely 688 | loner loner 689 | loonier loony 690 | looniest loony 691 | loopier loopy 692 | loopiest loopy 693 | lordlier lordly 694 | lordliest lordly 695 | lousier lousy 696 | lousiest lousy 697 | lovelier lovely 698 | loveliest lovely 699 | lowlander lowlander 700 | lowlier lowly 701 | lowliest lowly 702 | luckier lucky 703 | luckiest lucky 704 | lumpier lumpy 705 | lumpiest lumpy 706 | lunier luny 707 | luniest luny 708 | lustier lusty 709 | lustiest lusty 710 | madder mad 711 | maddest mad 712 | mainer mainer 713 | maligner maligner 714 | maltier malty 715 | maltiest malty 716 | mangier mangy 717 | mangiest mangy 718 | mankier manky 719 | mankiest manky 720 | manlier manly 721 | manliest manly 722 | mariner mariner 723 | marshier marshy 724 | marshiest marshy 725 | massier massy 726 | massiest massy 727 | matter matter 728 | maungier maungy 729 | maungiest maungy 730 | mazier mazy 731 | maziest mazy 732 | mealier mealy 733 | mealiest mealy 734 | measlier measly 735 | measliest measly 736 | meatier meaty 737 | meatiest meaty 738 | meeter meeter 739 | merrier merry 740 | merriest merry 741 | messier messy 742 | messiest messy 743 | miffier miffy 744 | miffiest miffy 745 | mightier mighty 746 | mightiest mighty 747 | milcher milcher 748 | milker milker 749 | milkier milky 750 | milkiest milky 751 | mingier mingy 752 | mingiest mingy 753 | minter minter 754 | mirkier mirky 755 | mirkiest mirky 756 | miser miser 757 | mistier misty 758 | mistiest misty 759 | mocker mocker 760 | modeler modeler 761 | modest modest 762 | moldier moldy 763 | moldiest moldy 764 | moodier moody 765 | moodiest moody 766 | moonier moony 767 | mooniest moony 768 | mothier mothy 769 | mothiest mothy 770 | mouldier mouldy 771 | mouldiest mouldy 772 | mousier mousy 773 | mousiest mousy 774 | mouthier mouthy 775 | mouthiest mouthy 776 | muckier mucky 777 | muckiest mucky 778 | muddier muddy 779 | muddiest muddy 780 | muggier muggy 781 | muggiest muggy 782 | multiplexer multiplexer 783 | murkier murky 784 | murkiest murky 785 | mushier mushy 786 | mushiest mushy 787 | muskier musky 788 | muskiest musky 789 | muster muster 790 | mustier musty 791 | mustiest musty 792 | muzzier muzzy 793 | muzziest muzzy 794 | nappier nappy 795 | nappiest nappy 796 | nastier nasty 797 | nastiest nasty 798 | nattier natty 799 | nattiest natty 800 | naughtier naughty 801 | naughtiest naughty 802 | needier needy 803 | neediest needy 804 | nervier nervy 805 | nerviest nervy 806 | newsier newsy 807 | newsiest newsy 808 | niftier nifty 809 | niftiest nifty 810 | nippier nippy 811 | nippiest nippy 812 | nittier nitty 813 | nittiest nitty 814 | noisier noisy 815 | noisiest noisy 816 | northeasterner northeasterner 817 | norther norther 818 | northerner northerner 819 | nosier nosy 820 | nosiest nosy 821 | number number 822 | nuttier nutty 823 | nuttiest nutty 824 | offer off 825 | offer offer 826 | oilier oily 827 | oiliest oily 828 | old-timer old-timer 829 | oliver oliver 830 | oozier oozy 831 | ooziest oozy 832 | opener opener 833 | outsider outsider 834 | overcomer overcomer 835 | overnighter overnighter 836 | owner owner 837 | pallier pally 838 | palliest pally 839 | palmier palmy 840 | palmiest palmy 841 | paltrier paltry 842 | paltriest paltry 843 | pappier pappy 844 | pappiest pappy 845 | parkier parky 846 | parkiest parky 847 | part-timer part-timer 848 | passer passer 849 | paster paster 850 | pastier pasty 851 | pastiest pasty 852 | patchier patchy 853 | patchiest patchy 854 | pater pater 855 | pawkier pawky 856 | pawkiest pawky 857 | peachier peachy 858 | peachiest peachy 859 | pearler pearler 860 | pearlier pearly 861 | pearliest pearly 862 | pedaler pedaler 863 | peppier peppy 864 | peppiest peppy 865 | perkier perky 866 | perkiest perky 867 | peskier pesky 868 | peskiest pesky 869 | peter peter 870 | pettier petty 871 | pettiest petty 872 | phonier phony 873 | phoniest phony 874 | pickier picky 875 | pickiest picky 876 | piggier piggy 877 | piggiest piggy 878 | pinier piny 879 | piniest piny 880 | pitchier pitchy 881 | pitchiest pitchy 882 | pithier pithy 883 | pithiest pithy 884 | planer planer 885 | plashier plashy 886 | plashiest plashy 887 | platier platy 888 | platiest platy 889 | player player 890 | pluckier plucky 891 | pluckiest plucky 892 | plumber plumber 893 | plumier plumy 894 | plumiest plumy 895 | plummier plummy 896 | plummiest plummy 897 | podgier podgy 898 | podgiest podgy 899 | pokier poky 900 | pokiest poky 901 | polisher polisher 902 | porkier porky 903 | porkiest porky 904 | porter porter 905 | portlier portly 906 | portliest portly 907 | poster poster 908 | pottier potty 909 | pottiest potty 910 | preachier preachy 911 | preachiest preachy 912 | presenter presenter 913 | pretender pretender 914 | prettier pretty 915 | prettiest pretty 916 | pricier pricy 917 | priciest pricy 918 | pricklier prickly 919 | prickliest prickly 920 | priestlier priestly 921 | priestliest priestly 922 | primer primer 923 | primmer prim 924 | primmest prim 925 | princelier princely 926 | princeliest princely 927 | printer printer 928 | prissier prissy 929 | prissiest prissy 930 | privateer privateer 931 | privier privy 932 | priviest privy 933 | prompter prompter 934 | prosier prosy 935 | prosiest prosy 936 | pudgier pudgy 937 | pudgiest pudgy 938 | puffer puffer 939 | puffier puffy 940 | puffiest puffy 941 | pulpier pulpy 942 | pulpiest pulpy 943 | punchier punchy 944 | punchiest punchy 945 | punier puny 946 | puniest puny 947 | pushier pushy 948 | pushiest pushy 949 | pussier pussy 950 | pussiest pussy 951 | quaggier quaggy 952 | quaggiest quaggy 953 | quakier quaky 954 | quakiest quaky 955 | queasier queasy 956 | queasiest queasy 957 | queenlier queenly 958 | queenliest queenly 959 | racier racy 960 | raciest racy 961 | rainier rainy 962 | rainiest rainy 963 | randier randy 964 | randiest randy 965 | rangier rangy 966 | rangiest rangy 967 | ranker ranker 968 | rattier ratty 969 | rattiest ratty 970 | rattlier rattly 971 | rattliest rattly 972 | raunchier raunchy 973 | raunchiest raunchy 974 | readier ready 975 | readiest ready 976 | recorder recorder 977 | redder red 978 | reddest red 979 | reedier reedy 980 | reediest reedy 981 | renter renter 982 | retailer retailer 983 | right-hander right-hander 984 | right-winger right-winger 985 | rimier rimy 986 | rimiest rimy 987 | riskier risky 988 | riskiest risky 989 | ritzier ritzy 990 | ritziest ritzy 991 | roaster roaster 992 | rockier rocky 993 | rockiest rocky 994 | roilier roily 995 | roiliest roily 996 | rookier rooky 997 | rookiest rooky 998 | roomier roomy 999 | roomiest roomy 1000 | ropier ropy 1001 | ropiest ropy 1002 | rosier rosy 1003 | rosiest rosy 1004 | rowdier rowdy 1005 | rowdiest rowdy 1006 | ruddier ruddy 1007 | ruddiest ruddy 1008 | runnier runny 1009 | runniest runny 1010 | rusher rusher 1011 | rushier rushy 1012 | rushiest rushy 1013 | rustier rusty 1014 | rustiest rusty 1015 | ruttier rutty 1016 | ruttiest rutty 1017 | sadder sad 1018 | saddest sad 1019 | salter salter 1020 | saltier salty 1021 | saltiest salty 1022 | sampler sampler 1023 | sandier sandy 1024 | sandiest sandy 1025 | sappier sappy 1026 | sappiest sappy 1027 | sassier sassy 1028 | sassiest sassy 1029 | saucier saucy 1030 | sauciest saucy 1031 | savvier savvy 1032 | savviest savvy 1033 | scabbier scabby 1034 | scabbiest scabby 1035 | scalier scaly 1036 | scaliest scaly 1037 | scantier scanty 1038 | scantiest scanty 1039 | scarier scary 1040 | scariest scary 1041 | scraggier scraggy 1042 | scraggiest scraggy 1043 | scragglier scraggly 1044 | scraggliest scraggly 1045 | scraper scraper 1046 | scrappier scrappy 1047 | scrappiest scrappy 1048 | scrawnier scrawny 1049 | scrawniest scrawny 1050 | screwier screwy 1051 | screwiest screwy 1052 | scrubbier scrubby 1053 | scrubbiest scrubby 1054 | scruffier scruffy 1055 | scruffiest scruffy 1056 | scungier scungy 1057 | scungiest scungy 1058 | scurvier scurvy 1059 | scurviest scurvy 1060 | seamier seamy 1061 | seamiest seamy 1062 | second-rater second-rater 1063 | seconder seconder 1064 | seedier seedy 1065 | seediest seedy 1066 | seemlier seemly 1067 | seemliest seemly 1068 | serer serer 1069 | sexier sexy 1070 | sexiest sexy 1071 | shabbier shabby 1072 | shabbiest shabby 1073 | shadier shady 1074 | shadiest shady 1075 | shaggier shaggy 1076 | shaggiest shaggy 1077 | shakier shaky 1078 | shakiest shaky 1079 | shapelier shapely 1080 | shapeliest shapely 1081 | shier shy 1082 | shiest shy 1083 | shiftier shifty 1084 | shiftiest shifty 1085 | shinier shiny 1086 | shiniest shiny 1087 | shirtier shirty 1088 | shirtiest shirty 1089 | shoddier shoddy 1090 | shoddiest shoddy 1091 | showier showy 1092 | showiest showy 1093 | shrubbier shrubby 1094 | shrubbiest shrubby 1095 | shyer shy 1096 | shyest shy 1097 | sicklier sickly 1098 | sickliest sickly 1099 | sightlier sightly 1100 | sightliest sightly 1101 | signaler signaler 1102 | signer signer 1103 | silkier silky 1104 | silkiest silky 1105 | sillier silly 1106 | silliest silly 1107 | sketchier sketchy 1108 | sketchiest sketchy 1109 | skewer skewer 1110 | skimpier skimpy 1111 | skimpiest skimpy 1112 | skinnier skinny 1113 | skinniest skinny 1114 | slaphappier slaphappy 1115 | slaphappiest slaphappy 1116 | slatier slaty 1117 | slatiest slaty 1118 | slaver slaver 1119 | sleazier sleazy 1120 | sleaziest sleazy 1121 | sleepier sleepy 1122 | sleepiest sleepy 1123 | slier sly 1124 | sliest sly 1125 | slimier slimy 1126 | slimiest slimy 1127 | slimmer slim 1128 | slimmest slim 1129 | slimsier slimsy 1130 | slimsiest slimsy 1131 | slinkier slinky 1132 | slinkiest slinky 1133 | slippier slippy 1134 | slippiest slippy 1135 | sloppier sloppy 1136 | sloppiest sloppy 1137 | slyer sly 1138 | slyest sly 1139 | smarmier smarmy 1140 | smarmiest smarmy 1141 | smellier smelly 1142 | smelliest smelly 1143 | smokier smoky 1144 | smokiest smoky 1145 | smugger smug 1146 | smuggest smug 1147 | snakier snaky 1148 | snakiest snaky 1149 | snappier snappy 1150 | snappiest snappy 1151 | snatchier snatchy 1152 | snatchiest snatchy 1153 | snazzier snazzy 1154 | snazziest snazzy 1155 | sneaker sneaker 1156 | sniffier sniffy 1157 | sniffiest sniffy 1158 | snootier snooty 1159 | snootiest snooty 1160 | snottier snotty 1161 | snottiest snotty 1162 | snowier snowy 1163 | snowiest snowy 1164 | snuffer snuffer 1165 | snuffier snuffy 1166 | snuffiest snuffy 1167 | snugger snug 1168 | snuggest snug 1169 | soapier soapy 1170 | soapiest soapy 1171 | soggier soggy 1172 | soggiest soggy 1173 | solder solder 1174 | sonsier sonsy 1175 | sonsiest sonsy 1176 | sootier sooty 1177 | sootiest sooty 1178 | soppier soppy 1179 | soppiest soppy 1180 | sorrier sorry 1181 | sorriest sorry 1182 | soupier soupy 1183 | soupiest soupy 1184 | souther souther 1185 | southerner southerner 1186 | speedier speedy 1187 | speediest speedy 1188 | spicier spicy 1189 | spiciest spicy 1190 | spiffier spiffy 1191 | spiffiest spiffy 1192 | spikier spiky 1193 | spikiest spiky 1194 | spindlier spindly 1195 | spindliest spindly 1196 | spinier spiny 1197 | spiniest spiny 1198 | splashier splashy 1199 | splashiest splashy 1200 | spongier spongy 1201 | spongiest spongy 1202 | spookier spooky 1203 | spookiest spooky 1204 | spoonier spoony 1205 | spooniest spoony 1206 | sportier sporty 1207 | sportiest sporty 1208 | spottier spotty 1209 | spottiest spotty 1210 | spreader spreader 1211 | sprier spry 1212 | spriest spry 1213 | sprightlier sprightly 1214 | sprightliest sprightly 1215 | springer springer 1216 | springier springy 1217 | springiest springy 1218 | squashier squashy 1219 | squashiest squashy 1220 | squatter squat 1221 | squattest squat 1222 | squattier squatty 1223 | squattiest squatty 1224 | squiffier squiffy 1225 | squiffiest squiffy 1226 | stagier stagy 1227 | stagiest stagy 1228 | stalkier stalky 1229 | stalkiest stalky 1230 | stapler stapler 1231 | starchier starchy 1232 | starchiest starchy 1233 | starer starer 1234 | starest starest 1235 | starrier starry 1236 | starriest starry 1237 | statelier stately 1238 | stateliest stately 1239 | steadier steady 1240 | steadiest steady 1241 | stealthier stealthy 1242 | stealthiest stealthy 1243 | steamier steamy 1244 | steamiest steamy 1245 | stingier stingy 1246 | stingiest stingy 1247 | stiper striper 1248 | stocker stocker 1249 | stockier stocky 1250 | stockiest stocky 1251 | stodgier stodgy 1252 | stodgiest stodgy 1253 | stonier stony 1254 | stoniest stony 1255 | stormier stormy 1256 | stormiest stormy 1257 | streakier streaky 1258 | streakiest streaky 1259 | streamier streamy 1260 | streamiest streamy 1261 | stretcher stretcher 1262 | stretchier stretchy 1263 | stretchiest stretchy 1264 | stringier stringy 1265 | stringiest stringy 1266 | stripier stripy 1267 | stripiest stripy 1268 | stronger strong 1269 | strongest strong 1270 | stroppier stroppy 1271 | stroppiest stroppy 1272 | stuffier stuffy 1273 | stuffiest stuffy 1274 | stumpier stumpy 1275 | stumpiest stumpy 1276 | sturdier sturdy 1277 | sturdiest sturdy 1278 | submariner submariner 1279 | sulkier sulky 1280 | sulkiest sulky 1281 | sultrier sultry 1282 | sultriest sultry 1283 | sunnier sunny 1284 | sunniest sunny 1285 | surlier surly 1286 | surliest surly 1287 | swagger swagger 1288 | swankier swanky 1289 | swankiest swanky 1290 | swarthier swarthy 1291 | swarthiest swarthy 1292 | sweatier sweaty 1293 | sweatiest sweaty 1294 | tackier tacky 1295 | tackiest tacky 1296 | talkier talky 1297 | talkiest talky 1298 | tangier tangy 1299 | tangiest tangy 1300 | tanner tan 1301 | tannest tan 1302 | tardier tardy 1303 | tardiest tardy 1304 | tastier tasty 1305 | tastiest tasty 1306 | tattier tatty 1307 | tattiest tatty 1308 | tawdrier tawdry 1309 | tawdriest tawdry 1310 | techier techy 1311 | techiest techy 1312 | teenager teenager 1313 | teenier teeny 1314 | teeniest teeny 1315 | teetotaler teetotaler 1316 | tester tester 1317 | testier testy 1318 | testiest testy 1319 | tetchier tetchy 1320 | tetchiest tetchy 1321 | thinner thin 1322 | thinnest thin 1323 | third-rater third-rater 1324 | thirstier thirsty 1325 | thirstiest thirsty 1326 | thornier thorny 1327 | thorniest thorny 1328 | threadier thready 1329 | threadiest thready 1330 | thriftier thrifty 1331 | thriftiest thrifty 1332 | throatier throaty 1333 | throatiest throaty 1334 | tidier tidy 1335 | tidiest tidy 1336 | timelier timely 1337 | timeliest timely 1338 | tinier tiny 1339 | tiniest tiny 1340 | tinnier tinny 1341 | tinniest tinny 1342 | tipsier tipsy 1343 | tipsiest tipsy 1344 | tonier tony 1345 | toniest tony 1346 | toothier toothy 1347 | toothiest toothy 1348 | toper toper 1349 | touchier touchy 1350 | touchiest touchy 1351 | trader trader 1352 | trashier trashy 1353 | trashiest trashy 1354 | trendier trendy 1355 | trendiest trendy 1356 | trickier tricky 1357 | trickiest tricky 1358 | tricksier tricksy 1359 | tricksiest tricksy 1360 | trimer trimer 1361 | trimmer trim 1362 | trimmest trim 1363 | truer true 1364 | truest true 1365 | trustier trusty 1366 | trustiest trusty 1367 | tubbier tubby 1368 | tubbiest tubby 1369 | turfier turfy 1370 | turfiest turfy 1371 | tweedier tweedy 1372 | tweediest tweedy 1373 | twiggier twiggy 1374 | twiggiest twiggy 1375 | uglier ugly 1376 | ugliest ugly 1377 | unfriendlier unfriendly 1378 | unfriendliest unfriendly 1379 | ungainlier ungainly 1380 | ungainliest ungainly 1381 | ungodlier ungodly 1382 | ungodliest ungodly 1383 | unhappier unhappy 1384 | unhappiest unhappy 1385 | unhealthier unhealthy 1386 | unhealthiest unhealthy 1387 | unholier unholy 1388 | unholiest unholy 1389 | unrulier unruly 1390 | unruliest unruly 1391 | untidier untidy 1392 | untidiest untidy 1393 | vastier vasty 1394 | vastiest vasty 1395 | vest vest 1396 | viewier viewy 1397 | viewiest viewy 1398 | wackier wacky 1399 | wackiest wacky 1400 | wanner wan 1401 | wannest wan 1402 | warier wary 1403 | wariest wary 1404 | washier washy 1405 | washiest washy 1406 | waster waster 1407 | wavier wavy 1408 | waviest wavy 1409 | waxier waxy 1410 | waxiest waxy 1411 | weaklier weakly 1412 | weakliest weakly 1413 | wealthier wealthy 1414 | wealthiest wealthy 1415 | wearier weary 1416 | weariest weary 1417 | webbier webby 1418 | webbiest webby 1419 | weedier weedy 1420 | weediest weedy 1421 | weenier weeny 1422 | weeniest weeny 1423 | weensier weensy 1424 | weensiest weensy 1425 | weepier weepy 1426 | weepiest weepy 1427 | weightier weighty 1428 | weightiest weighty 1429 | welsher welsher 1430 | wetter wet 1431 | wettest wet 1432 | whackier whacky 1433 | whackiest whacky 1434 | whimsier whimsy 1435 | whimsiest whimsy 1436 | wholesaler wholesaler 1437 | wieldier wieldy 1438 | wieldiest wieldy 1439 | wilier wily 1440 | wiliest wily 1441 | windier windy 1442 | windiest windy 1443 | winier winy 1444 | winiest winy 1445 | winterier wintery 1446 | winteriest wintery 1447 | wintrier wintry 1448 | wintriest wintry 1449 | wirier wiry 1450 | wiriest wiry 1451 | wispier wispy 1452 | wispiest wispy 1453 | wittier witty 1454 | wittiest witty 1455 | wonkier wonky 1456 | wonkiest wonky 1457 | woodier woody 1458 | woodiest woody 1459 | woodsier woodsy 1460 | woodsiest woodsy 1461 | woollier woolly 1462 | woolliest woolly 1463 | woozier woozy 1464 | wooziest woozy 1465 | wordier wordy 1466 | wordiest wordy 1467 | worldlier worldly 1468 | worldliest worldly 1469 | wormier wormy 1470 | wormiest wormy 1471 | worse bad 1472 | worst bad 1473 | worthier worthy 1474 | worthiest worthy 1475 | wrier wry 1476 | wriest wry 1477 | wryer wry 1478 | wryest wry 1479 | yarer yare 1480 | yarest yare 1481 | yeastier yeasty 1482 | yeastiest yeasty 1483 | younger young 1484 | youngest young 1485 | yummier yummy 1486 | yummiest yummy 1487 | zanier zany 1488 | zaniest zany 1489 | zippier zippy 1490 | zippiest zippy 1491 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/data/WordNet-2.0-Exceptions/adv.exc: -------------------------------------------------------------------------------- 1 | best well 2 | better well 3 | deeper deeply 4 | farther far 5 | further far 6 | harder hard 7 | hardest hard 8 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/data/WordNet-2.0-Exceptions/buildExeptionDB.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use DB_File; 3 | @ARGV!=3&&die "Usage: buildExceptionDB.pl WordNet-exception-file-directory exception-file-extension output-file\n"; 4 | opendir(DIR,$ARGV[0])||die "Cannot open directory $ARGV[0]\n"; 5 | tie %exceptiondb,'DB_File',"$ARGV[2]",O_CREAT|O_RDWR,0640,$DB_HASH or 6 | die "Cannot open exception db file for output: $ARGV[2]\n"; 7 | while(defined($file=readdir(DIR))) { 8 | if($file=~/\.$ARGV[1]$/o) { 9 | print $file,"\n"; 10 | open(IN,"$file")||die "Cannot open exception file: $file\n"; 11 | while(defined($line=)) { 12 | chomp($line); 13 | @tmp=split(/\s+/,$line); 14 | $exceptiondb{$tmp[0]}=$tmp[1]; 15 | print $tmp[0],"\n"; 16 | } 17 | close(IN); 18 | } 19 | } 20 | untie %exceptiondb; 21 | 22 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/data/WordNet-2.0.exc.db: -------------------------------------------------------------------------------- 1 | WordNet-2.0-Exceptions/WordNet-2.0.exc.db -------------------------------------------------------------------------------- /ROUGE-1.5.5/data/smart_common_words.txt: -------------------------------------------------------------------------------- 1 | reuters 2 | ap 3 | jan 4 | feb 5 | mar 6 | apr 7 | may 8 | jun 9 | jul 10 | aug 11 | sep 12 | oct 13 | nov 14 | dec 15 | tech 16 | news 17 | index 18 | mon 19 | tue 20 | wed 21 | thu 22 | fri 23 | sat 24 | 's 25 | a 26 | a's 27 | able 28 | about 29 | above 30 | according 31 | accordingly 32 | across 33 | actually 34 | after 35 | afterwards 36 | again 37 | against 38 | ain't 39 | all 40 | allow 41 | allows 42 | almost 43 | alone 44 | along 45 | already 46 | also 47 | although 48 | always 49 | am 50 | amid 51 | among 52 | amongst 53 | an 54 | and 55 | another 56 | any 57 | anybody 58 | anyhow 59 | anyone 60 | anything 61 | anyway 62 | anyways 63 | anywhere 64 | apart 65 | appear 66 | appreciate 67 | appropriate 68 | are 69 | aren't 70 | around 71 | as 72 | aside 73 | ask 74 | asking 75 | associated 76 | at 77 | available 78 | away 79 | awfully 80 | b 81 | be 82 | became 83 | because 84 | become 85 | becomes 86 | becoming 87 | been 88 | before 89 | beforehand 90 | behind 91 | being 92 | believe 93 | below 94 | beside 95 | besides 96 | best 97 | better 98 | between 99 | beyond 100 | both 101 | brief 102 | but 103 | by 104 | c 105 | c'mon 106 | c's 107 | came 108 | can 109 | can't 110 | cannot 111 | cant 112 | cause 113 | causes 114 | certain 115 | certainly 116 | changes 117 | clearly 118 | co 119 | com 120 | come 121 | comes 122 | concerning 123 | consequently 124 | consider 125 | considering 126 | contain 127 | containing 128 | contains 129 | corresponding 130 | could 131 | couldn't 132 | course 133 | currently 134 | d 135 | definitely 136 | described 137 | despite 138 | did 139 | didn't 140 | different 141 | do 142 | does 143 | doesn't 144 | doing 145 | don't 146 | done 147 | down 148 | downwards 149 | during 150 | e 151 | each 152 | edu 153 | eg 154 | e.g. 155 | eight 156 | either 157 | else 158 | elsewhere 159 | enough 160 | entirely 161 | especially 162 | et 163 | etc 164 | etc. 165 | even 166 | ever 167 | every 168 | everybody 169 | everyone 170 | everything 171 | everywhere 172 | ex 173 | exactly 174 | example 175 | except 176 | f 177 | far 178 | few 179 | fifth 180 | five 181 | followed 182 | following 183 | follows 184 | for 185 | former 186 | formerly 187 | forth 188 | four 189 | from 190 | further 191 | furthermore 192 | g 193 | get 194 | gets 195 | getting 196 | given 197 | gives 198 | go 199 | goes 200 | going 201 | gone 202 | got 203 | gotten 204 | greetings 205 | h 206 | had 207 | hadn't 208 | happens 209 | hardly 210 | has 211 | hasn't 212 | have 213 | haven't 214 | having 215 | he 216 | he's 217 | hello 218 | help 219 | hence 220 | her 221 | here 222 | here's 223 | hereafter 224 | hereby 225 | herein 226 | hereupon 227 | hers 228 | herself 229 | hi 230 | him 231 | himself 232 | his 233 | hither 234 | hopefully 235 | how 236 | howbeit 237 | however 238 | i 239 | i'd 240 | i'll 241 | i'm 242 | i've 243 | ie 244 | i.e. 245 | if 246 | ignored 247 | immediate 248 | in 249 | inasmuch 250 | inc 251 | indeed 252 | indicate 253 | indicated 254 | indicates 255 | inner 256 | insofar 257 | instead 258 | into 259 | inward 260 | is 261 | isn't 262 | it 263 | it'd 264 | it'll 265 | it's 266 | its 267 | itself 268 | j 269 | just 270 | k 271 | keep 272 | keeps 273 | kept 274 | know 275 | knows 276 | known 277 | l 278 | lately 279 | later 280 | latter 281 | latterly 282 | least 283 | less 284 | lest 285 | let 286 | let's 287 | like 288 | liked 289 | likely 290 | little 291 | look 292 | looking 293 | looks 294 | ltd 295 | m 296 | mainly 297 | many 298 | may 299 | maybe 300 | me 301 | mean 302 | meanwhile 303 | merely 304 | might 305 | more 306 | moreover 307 | most 308 | mostly 309 | mr. 310 | ms. 311 | much 312 | must 313 | my 314 | myself 315 | n 316 | namely 317 | nd 318 | near 319 | nearly 320 | necessary 321 | need 322 | needs 323 | neither 324 | never 325 | nevertheless 326 | new 327 | next 328 | nine 329 | no 330 | nobody 331 | non 332 | none 333 | noone 334 | nor 335 | normally 336 | not 337 | nothing 338 | novel 339 | now 340 | nowhere 341 | o 342 | obviously 343 | of 344 | off 345 | often 346 | oh 347 | ok 348 | okay 349 | old 350 | on 351 | once 352 | one 353 | ones 354 | only 355 | onto 356 | or 357 | other 358 | others 359 | otherwise 360 | ought 361 | our 362 | ours 363 | ourselves 364 | out 365 | outside 366 | over 367 | overall 368 | own 369 | p 370 | particular 371 | particularly 372 | per 373 | perhaps 374 | placed 375 | please 376 | plus 377 | possible 378 | presumably 379 | probably 380 | provides 381 | q 382 | que 383 | quite 384 | qv 385 | r 386 | rather 387 | rd 388 | re 389 | really 390 | reasonably 391 | regarding 392 | regardless 393 | regards 394 | relatively 395 | respectively 396 | right 397 | s 398 | said 399 | same 400 | saw 401 | say 402 | saying 403 | says 404 | second 405 | secondly 406 | see 407 | seeing 408 | seem 409 | seemed 410 | seeming 411 | seems 412 | seen 413 | self 414 | selves 415 | sensible 416 | sent 417 | serious 418 | seriously 419 | seven 420 | several 421 | shall 422 | she 423 | should 424 | shouldn't 425 | since 426 | six 427 | so 428 | some 429 | somebody 430 | somehow 431 | someone 432 | something 433 | sometime 434 | sometimes 435 | somewhat 436 | somewhere 437 | soon 438 | sorry 439 | specified 440 | specify 441 | specifying 442 | still 443 | sub 444 | such 445 | sup 446 | sure 447 | t 448 | t's 449 | take 450 | taken 451 | tell 452 | tends 453 | th 454 | than 455 | thank 456 | thanks 457 | thanx 458 | that 459 | that's 460 | thats 461 | the 462 | their 463 | theirs 464 | them 465 | themselves 466 | then 467 | thence 468 | there 469 | there's 470 | thereafter 471 | thereby 472 | therefore 473 | therein 474 | theres 475 | thereupon 476 | these 477 | they 478 | they'd 479 | they'll 480 | they're 481 | they've 482 | think 483 | third 484 | this 485 | thorough 486 | thoroughly 487 | those 488 | though 489 | three 490 | through 491 | throughout 492 | thru 493 | thus 494 | to 495 | together 496 | too 497 | took 498 | toward 499 | towards 500 | tried 501 | tries 502 | truly 503 | try 504 | trying 505 | twice 506 | two 507 | u 508 | un 509 | under 510 | unfortunately 511 | unless 512 | unlikely 513 | until 514 | unto 515 | up 516 | upon 517 | us 518 | use 519 | used 520 | useful 521 | uses 522 | using 523 | usually 524 | uucp 525 | v 526 | value 527 | various 528 | very 529 | via 530 | viz 531 | vs 532 | w 533 | want 534 | wants 535 | was 536 | wasn't 537 | way 538 | we 539 | we'd 540 | we'll 541 | we're 542 | we've 543 | welcome 544 | well 545 | went 546 | were 547 | weren't 548 | what 549 | what's 550 | whatever 551 | when 552 | whence 553 | whenever 554 | where 555 | where's 556 | whereafter 557 | whereas 558 | whereby 559 | wherein 560 | whereupon 561 | wherever 562 | whether 563 | which 564 | while 565 | whither 566 | who 567 | who's 568 | whoever 569 | whole 570 | whom 571 | whose 572 | why 573 | will 574 | willing 575 | wish 576 | with 577 | within 578 | without 579 | won't 580 | wonder 581 | would 582 | would 583 | wouldn't 584 | x 585 | y 586 | yes 587 | yet 588 | you 589 | you'd 590 | you'll 591 | you're 592 | you've 593 | your 594 | yours 595 | yourself 596 | yourselves 597 | z 598 | zero 599 | -------------------------------------------------------------------------------- /ROUGE-1.5.5/runROUGE-test.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Cwd; 3 | $curdir=getcwd; 4 | $ROUGE="../ROUGE-1.5.5.pl"; 5 | chdir("sample-test"); 6 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -a ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-a.out"; 7 | print $cmd,"\n"; 8 | system($cmd); 9 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -a -m ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-a-m.out"; 10 | print $cmd,"\n"; 11 | system($cmd); 12 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -a -m -s ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-a-m-s.out"; 13 | print $cmd,"\n"; 14 | system($cmd); 15 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -l 10 -a ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-l10-a.out"; 16 | print $cmd,"\n"; 17 | system($cmd); 18 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -l 10 -a -m ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-l10-a-m.out"; 19 | print $cmd,"\n"; 20 | system($cmd); 21 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -l 10 -a -m -s ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-l10-a-m-s.out"; 22 | print $cmd,"\n"; 23 | system($cmd); 24 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -b 75 -a ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-b75-a.out"; 25 | print $cmd,"\n"; 26 | system($cmd); 27 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -b 75 -a -m ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-b75-a-m.out"; 28 | print $cmd,"\n"; 29 | system($cmd); 30 | $cmd="$ROUGE -e ../data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -b 75 -a -m -s ROUGE-test.xml > ../sample-output/ROUGE-test-c95-2-1-U-r1000-n4-w1.2-b75-a-m-s.out"; 31 | print $cmd,"\n"; 32 | system($cmd); 33 | $cmd="$ROUGE -e ../data -3 HM -z SIMPLE DUC2002-BE-F.in.26.lst 26 > ../sample-output/DUC2002-BE-F.in.26.lst.out"; 34 | print $cmd,"\n"; 35 | system($cmd); 36 | $cmd="$ROUGE -e ../data -3 HM DUC2002-BE-F.in.26.simple.xml 26 > ../sample-output/DUC2002-BE-F.in.26.simple.out"; 37 | print $cmd,"\n"; 38 | system($cmd); 39 | $cmd="$ROUGE -e ../data -3 HM -z SIMPLE DUC2002-BE-L.in.26.lst 26 > ../sample-output/DUC2002-BE-L.in.26.lst.out"; 40 | print $cmd,"\n"; 41 | system($cmd); 42 | $cmd="$ROUGE -e ../data -3 HM DUC2002-BE-L.in.26.simple.xml 26 > ../sample-output/DUC2002-BE-L.in.26.simple.out"; 43 | print $cmd,"\n"; 44 | system($cmd); 45 | $cmd="$ROUGE -e ../data -n 4 -z SPL DUC2002-ROUGE.in.26.spl.lst 26 > ../sample-output/DUC2002-ROUGE.in.26.spl.lst.out"; 46 | print $cmd,"\n"; 47 | system($cmd); 48 | $cmd="$ROUGE -e ../data -n 4 DUC2002-ROUGE.in.26.spl.xml 26 > ../sample-output/DUC2002-ROUGE.in.26.spl.out"; 49 | print $cmd,"\n"; 50 | system($cmd); 51 | chdir($curdir); 52 | -------------------------------------------------------------------------------- /calRouge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import os 5 | import shutil 6 | import copy 7 | import time 8 | import codecs 9 | import datetime 10 | import logging 11 | import numpy as np 12 | import argparse 13 | 14 | from pyrouge import Rouge155 15 | 16 | _PYROUGE_PATH = os.environ['PYROUGE_HOME_DIR'] 17 | _PYROUGE_TEMP_PATH = os.environ.get('PYROUGE_TEMP_PATH', '.') 18 | 19 | class Mapping(): 20 | def __init__(self): 21 | self._char2num = {} 22 | self.cnt = 0 23 | 24 | def tonum(self, x): 25 | if x not in self._char2num.keys(): 26 | self._char2num[x] = self.cnt 27 | self.cnt += 1 28 | return str(self._char2num[x]) 29 | 30 | def charRemap(self, strlist): 31 | numlist = [] 32 | for sent in strlist.split("\n"): 33 | numlist.append(" ".join([self.tonum(x) for x in sent.split(" ")])) 34 | return "\n".join(numlist) 35 | 36 | 37 | def pyrouge_score(hypo_list, refer_list, language, level='char', convert=True, debug=False): 38 | """ calculate prouge for hypo and single refer 39 | 40 | :param hypo_list: list, each item is a (tokenized) string. Multiple sentences must be concatenated with '\n' for ROUGE-L 41 | :param refer_list: list for a single reference or list(list) for multiple references, the same format with hypo_list 42 | :param language: 'zh' and 'ja' will be split and mapped to numbers 43 | :param level: 'char' or 'word', only work for language='zh'. 'char' will split strings by chinese character and keep english words and numbers the same 44 | :return: 45 | scores: dict 46 | rouge-1: p, r, f 47 | rouge-2: p, r, f 48 | rouge-l: p, r, f 49 | """ 50 | if isinstance(refer_list[0], str): # single reference 51 | refer_list = [[refer] for refer in refer_list] 52 | assert len(hypo_list) == len(refer_list) 53 | 54 | if (language == 'zh' or language == 'ja' or language == 'ko') and level == 'char': 55 | hypo_list = [str2char(ins, language) for ins in hypo_list] 56 | refer_list = [[str2char(refer, language) for refer in ins] for ins in refer_list] 57 | 58 | if debug: 59 | for h, r in zip(hypo_list, refer_list): 60 | print('{}\t{}'.format(h, r)) 61 | 62 | nowTime=datetime.datetime.now().strftime('%Y%m%d_%H%M%S') 63 | PYROUGE_ROOT = os.path.join(_PYROUGE_TEMP_PATH, nowTime) 64 | SYSTEM_PATH = os.path.join(PYROUGE_ROOT,'hypothesis') 65 | MODEL_PATH = os.path.join(PYROUGE_ROOT,'reference') 66 | os.makedirs(SYSTEM_PATH) 67 | os.makedirs(MODEL_PATH) 68 | 69 | r = Rouge155(rouge_dir=_PYROUGE_PATH, log_level=logging.WARNING) 70 | # r = Rouge155(rouge_dir=_PYROUGE_PATH) 71 | r.system_dir = SYSTEM_PATH 72 | r.model_dir = MODEL_PATH 73 | r.system_filename_pattern = 'Model.(\d+).txt' 74 | r.model_filename_pattern = 'Reference.[A-Z].#ID#.txt' 75 | 76 | for i in range(len(hypo_list)): 77 | hypo_file = os.path.join(SYSTEM_PATH, 'Model.%d.txt' % i) 78 | # if language == 'zh' or language == 'ja' or language == 'ko': 79 | if language != 'en': 80 | mapdict = Mapping() 81 | refer = [mapdict.charRemap(refer_list[i][j]) for j in range(len(refer_list[i]))] 82 | hypo = mapdict.charRemap(hypo_list[i]) 83 | else: 84 | refer = refer_list[i] 85 | hypo = hypo_list[i] 86 | with open(hypo_file, 'wb') as f: 87 | f.write(hypo.encode('utf-8')) 88 | 89 | for j in range(len(refer_list[i])): 90 | refer_file = os.path.join(MODEL_PATH, "Reference.%s.%d.txt" % (chr(ord('A')+j), i)) 91 | with open(refer_file, 'wb') as f: 92 | f.write(refer[j].encode('utf-8')) 93 | 94 | try: 95 | output = r.convert_and_evaluate(rouge_args="-e %s/data -a -m -n 2 -d" % (_PYROUGE_PATH)) 96 | output_dict = r.output_to_dict(output) 97 | finally: 98 | # pass 99 | if os.path.isdir(PYROUGE_ROOT): 100 | shutil.rmtree(PYROUGE_ROOT) 101 | 102 | scores = convertFormat(output_dict) if convert else output_dict 103 | return scores 104 | 105 | 106 | ################# tools ################# 107 | 108 | def splitChars(sent, lang): 109 | if lang == 'zh': 110 | parts = re.split(u"([\u4e00-\u9fa5])", sent) 111 | elif lang == 'ja': 112 | parts = re.split(u"([\u0800-\u4e00])",sent) 113 | elif lang == 'ko': 114 | parts = re.split(u"([\uac00-\ud7ff])", sent) 115 | else: # Chinese, Japanese and Korean non-symbol characters 116 | parts = re.split(u"([\u2e80-\u9fff])", sent) 117 | return [p.strip().lower() for p in parts if p != "" and p != " "] 118 | 119 | def str2char(string, language='all'): 120 | sents = string.split("\n") 121 | tokens = [" ".join(splitChars(s, language)) for s in sents] 122 | return "\n".join(tokens) 123 | 124 | def convertFormat(output_dict): 125 | scores = {} 126 | scores['rouge-1'], scores['rouge-2'], scores['rouge-l'] = {}, {}, {} 127 | fullname={'p': 'precision', 'r': 'recall', 'f': 'f_score'} 128 | for t in ['1', '2', 'l']: 129 | for m in ['p', 'r', 'f']: 130 | scores['rouge-%s' % t][m] = output_dict['rouge_%s_%s' % (t, fullname[m])] * 100 131 | return scores 132 | 133 | def rouge_results_to_str(results_dict): 134 | return "ROUGE-F(1/2/l): {:.2f}/{:.2f}/{:.2f}\nROUGE-R(1/2/l): {:.2f}/{:.2f}/{:.2f}\nROUGE-P(1/2/l): {:.2f}/{:.2f}/{:.2f}\n".format( 135 | results_dict["rouge_1_f_score"] * 100, 136 | results_dict["rouge_2_f_score"] * 100, 137 | results_dict["rouge_l_f_score"] * 100, 138 | results_dict["rouge_1_recall"] * 100, 139 | results_dict["rouge_2_recall"] * 100, 140 | results_dict["rouge_l_recall"] * 100, 141 | results_dict["rouge_1_precision"] * 100, 142 | results_dict["rouge_2_precision"] * 100, 143 | results_dict["rouge_l_precision"] * 100 144 | ) 145 | 146 | ################# main ################# 147 | 148 | if __name__ == "__main__": 149 | parser = argparse.ArgumentParser() 150 | parser.add_argument('-c', type=str, default="candidate.txt", help='candidate file') 151 | parser.add_argument('-r', type=str, default="reference.txt", help='reference file') 152 | parser.add_argument('-l', type=str, default="en", help='language') 153 | parser.add_argument('-d', type=str, default="", help='delimiter') 154 | parser.add_argument('-t', action='store_true', help='need to tokenize the original document') 155 | parser.add_argument('-v', action='store_true', help='print detailed information') 156 | args = parser.parse_args() 157 | print(args) 158 | candidates = codecs.open(args.c, encoding="utf-8") 159 | references = codecs.open(args.r, encoding="utf-8") 160 | 161 | try: 162 | from pysbd import Segmenter 163 | seg = Segmenter(language=args.l, clean=False) 164 | except ImportError: 165 | raise ImportError('Please install pySBD splitor with: pip install pysbd') 166 | except ValueError: 167 | print("Unknown language code. Use language=en for segmentation.") 168 | seg = Segmenter(language='en', clean=False) 169 | 170 | print("Split sentences by pySBD\t\t" + time.strftime('%H:%M:%S', time.localtime())) 171 | 172 | references = [line.strip().lower() for line in references] 173 | candidates = [line.strip().lower() for line in candidates] 174 | if args.d: 175 | references = ["\n".join(line.split(args.d)) if args.d in line else "\n".join(seg.segment(line)) for line in references] 176 | candidates = ["\n".join(line.split(args.d)) if args.d in line else "\n".join(seg.segment(line)) for line in candidates] 177 | else: 178 | references = ["\n".join(seg.segment(e)) for e in references] 179 | candidates = ["\n".join(seg.segment(e)) for e in candidates] 180 | 181 | if args.t: 182 | try: 183 | from sacremoses import MosesTokenizer 184 | print("Tokenize string by sacremoses\t\t" + time.strftime('%H:%M:%S', time.localtime())) 185 | tok = MosesTokenizer(lang=args.l) 186 | except ImportError: 187 | raise ImportError('Please install Moses tokenizer with: pip install sacremoses') 188 | doc = [tok.tokenize(e, args.l, return_str=True) for e in candidates] 189 | summ = [tok.tokenize(e, args.l, return_str=True) for e in references] 190 | 191 | print("candidate: %d, reference: %d\t%s" % (len(candidates), len(references), time.strftime('%H:%M:%S', time.localtime()))) 192 | 193 | assert len(candidates) == len(references) 194 | results_dict = pyrouge_score(candidates, references, args.l, convert=False, debug=args.v) 195 | print(rouge_results_to_str(results_dict)) 196 | print(time.strftime('%H:%M:%S', time.localtime())) 197 | -------------------------------------------------------------------------------- /example/candidate.txt: -------------------------------------------------------------------------------- 1 | 手机将装载新系统 2 | 男子打服务员出气 3 | -------------------------------------------------------------------------------- /example/reference.txt: -------------------------------------------------------------------------------- 1 | Surface将装载Windows 2 | 服务员失手关灯男子冲动之下打砸出气 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | sacremoses 2 | pysbd 3 | git+https://github.com/bheinzerling/pyrouge 4 | --------------------------------------------------------------------------------