├── .gitignore ├── LICENSE ├── README.md ├── client.py ├── corenlp.py ├── default.properties ├── jsonrpc.py └── progressbar.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /stanford-corenlp-* 3 | .*project 4 | 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python interface to Stanford Core NLP tools v3.4.1 2 | 3 | This is a Python wrapper for Stanford University's NLP group's Java-based [CoreNLP tools](http://nlp.stanford.edu/software/corenlp.shtml). It can either be imported as a module or run as a JSON-RPC server. Because it uses many large trained models (requiring 3GB RAM on 64-bit machines and usually a few minutes loading time), most applications will probably want to run it as a server. 4 | 5 | 6 | * Python interface to Stanford CoreNLP tools: tagging, phrase-structure parsing, dependency parsing, [named-entity recognition](http://en.wikipedia.org/wiki/Named-entity_recognition), and [coreference resolution](http://en.wikipedia.org/wiki/Coreference). 7 | * Runs an JSON-RPC server that wraps the Java server and outputs JSON. 8 | * Outputs parse trees which can be used by [nltk](http://nltk.googlecode.com/svn/trunk/doc/howto/tree.html). 9 | 10 | 11 | It depends on [pexpect](http://www.noah.org/wiki/pexpect) and includes and uses code from [jsonrpc](http://www.simple-is-better.org/rpc/) and [python-progressbar](http://code.google.com/p/python-progressbar/). 12 | 13 | It runs the Stanford CoreNLP jar in a separate process, communicates with the java process using its command-line interface, and makes assumptions about the output of the parser in order to parse it into a Python dict object and transfer it using JSON. The parser will break if the output changes significantly, but it has been tested on **Core NLP tools version 3.4.1** released 2014-08-27. 14 | 15 | ## Download and Usage 16 | 17 | To use this program you must [download](http://nlp.stanford.edu/software/corenlp.shtml#Download) and unpack the compressed file containing Stanford's CoreNLP package. By default, `corenlp.py` looks for the Stanford Core NLP folder as a subdirectory of where the script is being run. In other words: 18 | 19 | sudo pip install pexpect unidecode 20 | git clone git://github.com/dasmith/stanford-corenlp-python.git 21 | cd stanford-corenlp-python 22 | wget http://nlp.stanford.edu/software/stanford-corenlp-full-2014-08-27.zip 23 | unzip stanford-corenlp-full-2014-08-27.zip 24 | 25 | Then launch the server: 26 | 27 | python corenlp.py 28 | 29 | Optionally, you can specify a host or port: 30 | 31 | python corenlp.py -H 0.0.0.0 -p 3456 32 | 33 | That will run a public JSON-RPC server on port 3456. 34 | 35 | Assuming you are running on port 8080, the code in `client.py` shows an example parse: 36 | 37 | import jsonrpc 38 | from simplejson import loads 39 | server = jsonrpc.ServerProxy(jsonrpc.JsonRpc20(), 40 | jsonrpc.TransportTcpIp(addr=("127.0.0.1", 8080))) 41 | 42 | result = loads(server.parse("Hello world. It is so beautiful")) 43 | print "Result", result 44 | 45 | That returns a dictionary containing the keys `sentences` and `coref`. The key `sentences` contains a list of dictionaries for each sentence, which contain `parsetree`, `text`, `tuples` containing the dependencies, and `words`, containing information about parts of speech, recognized named-entities, etc: 46 | 47 | {u'sentences': [{u'parsetree': u'(ROOT (S (VP (NP (INTJ (UH Hello)) (NP (NN world)))) (. !)))', 48 | u'text': u'Hello world!', 49 | u'tuples': [[u'dep', u'world', u'Hello'], 50 | [u'root', u'ROOT', u'world']], 51 | u'words': [[u'Hello', 52 | {u'CharacterOffsetBegin': u'0', 53 | u'CharacterOffsetEnd': u'5', 54 | u'Lemma': u'hello', 55 | u'NamedEntityTag': u'O', 56 | u'PartOfSpeech': u'UH'}], 57 | [u'world', 58 | {u'CharacterOffsetBegin': u'6', 59 | u'CharacterOffsetEnd': u'11', 60 | u'Lemma': u'world', 61 | u'NamedEntityTag': u'O', 62 | u'PartOfSpeech': u'NN'}], 63 | [u'!', 64 | {u'CharacterOffsetBegin': u'11', 65 | u'CharacterOffsetEnd': u'12', 66 | u'Lemma': u'!', 67 | u'NamedEntityTag': u'O', 68 | u'PartOfSpeech': u'.'}]]}, 69 | {u'parsetree': u'(ROOT (S (NP (PRP It)) (VP (VBZ is) (ADJP (RB so) (JJ beautiful))) (. .)))', 70 | u'text': u'It is so beautiful.', 71 | u'tuples': [[u'nsubj', u'beautiful', u'It'], 72 | [u'cop', u'beautiful', u'is'], 73 | [u'advmod', u'beautiful', u'so'], 74 | [u'root', u'ROOT', u'beautiful']], 75 | u'words': [[u'It', 76 | {u'CharacterOffsetBegin': u'14', 77 | u'CharacterOffsetEnd': u'16', 78 | u'Lemma': u'it', 79 | u'NamedEntityTag': u'O', 80 | u'PartOfSpeech': u'PRP'}], 81 | [u'is', 82 | {u'CharacterOffsetBegin': u'17', 83 | u'CharacterOffsetEnd': u'19', 84 | u'Lemma': u'be', 85 | u'NamedEntityTag': u'O', 86 | u'PartOfSpeech': u'VBZ'}], 87 | [u'so', 88 | {u'CharacterOffsetBegin': u'20', 89 | u'CharacterOffsetEnd': u'22', 90 | u'Lemma': u'so', 91 | u'NamedEntityTag': u'O', 92 | u'PartOfSpeech': u'RB'}], 93 | [u'beautiful', 94 | {u'CharacterOffsetBegin': u'23', 95 | u'CharacterOffsetEnd': u'32', 96 | u'Lemma': u'beautiful', 97 | u'NamedEntityTag': u'O', 98 | u'PartOfSpeech': u'JJ'}], 99 | [u'.', 100 | {u'CharacterOffsetBegin': u'32', 101 | u'CharacterOffsetEnd': u'33', 102 | u'Lemma': u'.', 103 | u'NamedEntityTag': u'O', 104 | u'PartOfSpeech': u'.'}]]}], 105 | u'coref': [[[[u'It', 1, 0, 0, 1], [u'Hello world', 0, 1, 0, 2]]]]} 106 | 107 | To use it in a regular script (useful for debugging), load the module instead: 108 | 109 | from corenlp import * 110 | corenlp = StanfordCoreNLP() # wait a few minutes... 111 | corenlp.parse("Parse this sentence.") 112 | 113 | The server, `StanfordCoreNLP()`, takes an optional argument `corenlp_path` which specifies the path to the jar files. The default value is `StanfordCoreNLP(corenlp_path="./stanford-corenlp-full-2014-08-27/")`. 114 | 115 | ## Coreference Resolution 116 | 117 | The library supports [coreference resolution](http://en.wikipedia.org/wiki/Coreference), which means pronouns can be "dereferenced." If an entry in the `coref` list is, `[u'Hello world', 0, 1, 0, 2]`, the numbers mean: 118 | 119 | * 0 = The reference appears in the 0th sentence (e.g. "Hello world") 120 | * 1 = The 2nd token, "world", is the [headword](http://en.wikipedia.org/wiki/Head_%28linguistics%29) of that sentence 121 | * 0 = 'Hello world' begins at the 0th token in the sentence 122 | * 2 = 'Hello world' ends before the 2nd token in the sentence. 123 | 124 | 135 | 136 | 137 | ## Questions 138 | 139 | **Stanford CoreNLP tools require a large amount of free memory**. Java 5+ uses about 50% more RAM on 64-bit machines than 32-bit machines. 32-bit machine users can lower the memory requirements by changing `-Xmx3g` to `-Xmx2g` or even less. 140 | If pexpect timesout while loading models, check to make sure you have enough memory and can run the server alone without your kernel killing the java process: 141 | 142 | java -cp stanford-corenlp-2014-08-27.jar:stanford-corenlp-3.4.1-models.jar:xom.jar:joda-time.jar -Xmx3g edu.stanford.nlp.pipeline.StanfordCoreNLP -props default.properties 143 | 144 | You can reach me, Dustin Smith, by sending a message on GitHub or through email (contact information is available [on my webpage](http://web.media.mit.edu/~dustin)). 145 | 146 | 147 | # License & Contributors 148 | 149 | This is free and open source software and has benefited from the contribution and feedback of others. Like Stanford's CoreNLP tools, it is covered under the [GNU General Public License v2 +](http://www.gnu.org/licenses/gpl-2.0.html), which in short means that modifications to this program must maintain the same free and open source distribution policy. 150 | 151 | I gratefully welcome bug fixes and new features. If you have forked this repository, please submit a [pull request](https://help.github.com/articles/using-pull-requests/) so others can benefit from your contributions. This project has already benefited from contributions from these members of the open source community: 152 | 153 | * [Emilio Monti](https://github.com/emilmont) 154 | * [Justin Cheng](https://github.com/jcccf) 155 | * Abhaya Agarwal 156 | 157 | *Thank you!* 158 | 159 | ## Related Projects 160 | 161 | Maintainers of the Core NLP library at Stanford keep an [updated list of wrappers and extensions](http://nlp.stanford.edu/software/corenlp.shtml#Extensions). See Brendan O'Connor's [stanford_corenlp_pywrapper](https://github.com/brendano/stanford_corenlp_pywrapper) for a different approach more suited to batch processing. 162 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | import json 2 | from jsonrpc import ServerProxy, JsonRpc20, TransportTcpIp 3 | from pprint import pprint 4 | 5 | class StanfordNLP: 6 | def __init__(self): 7 | self.server = ServerProxy(JsonRpc20(), 8 | TransportTcpIp(addr=("127.0.0.1", 8080))) 9 | 10 | def parse(self, text): 11 | return json.loads(self.server.parse(text)) 12 | 13 | nlp = StanfordNLP() 14 | result = nlp.parse("Hello world! It is so beautiful.") 15 | pprint(result) 16 | 17 | from nltk.tree import Tree 18 | tree = Tree.parse(result['sentences'][0]['parsetree']) 19 | pprint(tree) 20 | -------------------------------------------------------------------------------- /corenlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # corenlp - Python interface to Stanford Core NLP tools 4 | # Copyright (c) 2014 Dustin Smith 5 | # https://github.com/dasmith/stanford-corenlp-python 6 | # 7 | # This program is free software; you can redistribute it and/or 8 | # modify it under the terms of the GNU General Public License 9 | # as published by the Free Software Foundation; either version 2 10 | # of the License, or (at your option) any later version. 11 | # 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program; if not, write to the Free Software 19 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 20 | 21 | import json 22 | import optparse 23 | import os, re, sys, time, traceback 24 | import jsonrpc, pexpect 25 | from progressbar import ProgressBar, Fraction 26 | import logging 27 | 28 | 29 | VERBOSE = True 30 | 31 | STATE_START, STATE_TEXT, STATE_WORDS, STATE_TREE, STATE_DEPENDENCY, STATE_COREFERENCE = 0, 1, 2, 3, 4, 5 32 | WORD_PATTERN = re.compile('\[([^\]]+)\]') 33 | CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)\]\) -> \((\d*),(\d)*,\[(\d*),(\d*)\]\), that is: \"(.*)\" -> \"(.*)\"") 34 | 35 | # initialize logger 36 | logging.basicConfig(level=logging.INFO) 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | def remove_id(word): 41 | """Removes the numeric suffix from the parsed recognized words: e.g. 'word-2' > 'word' """ 42 | return word.count("-") == 0 and word or word[0:word.rindex("-")] 43 | 44 | 45 | def parse_bracketed(s): 46 | '''Parse word features [abc=... def = ...] 47 | Also manages to parse out features that have XML within them 48 | ''' 49 | word = None 50 | attrs = {} 51 | temp = {} 52 | # Substitute XML tags, to replace them later 53 | for i, tag in enumerate(re.findall(r"(<[^<>]+>.*<\/[^<>]+>)", s)): 54 | temp["^^^%d^^^" % i] = tag 55 | s = s.replace(tag, "^^^%d^^^" % i) 56 | # Load key-value pairs, substituting as necessary 57 | for attr, val in re.findall(r"([^=\s]*)=([^=\s]*)", s): 58 | if val in temp: 59 | val = temp[val] 60 | if attr == 'Text': 61 | word = val 62 | else: 63 | attrs[attr] = val 64 | return (word, attrs) 65 | 66 | 67 | def parse_parser_results(text): 68 | """ This is the nasty bit of code to interact with the command-line 69 | interface of the CoreNLP tools. Takes a string of the parser results 70 | and then returns a Python list of dictionaries, one for each parsed 71 | sentence. 72 | """ 73 | results = {"sentences": []} 74 | state = STATE_START 75 | for line in text.encode('utf-8').split("\n"): 76 | line = line.strip() 77 | 78 | if line.startswith("Sentence #"): 79 | sentence = {'words':[], 'parsetree':[], 'dependencies':[]} 80 | results["sentences"].append(sentence) 81 | state = STATE_TEXT 82 | 83 | elif state == STATE_TEXT: 84 | sentence['text'] = line 85 | state = STATE_WORDS 86 | 87 | elif state == STATE_WORDS: 88 | if not line.startswith("[Text="): 89 | raise Exception('Parse error. Could not find "[Text=" in: %s' % line) 90 | for s in WORD_PATTERN.findall(line): 91 | sentence['words'].append(parse_bracketed(s)) 92 | state = STATE_TREE 93 | 94 | elif state == STATE_TREE: 95 | if len(line) == 0: 96 | state = STATE_DEPENDENCY 97 | sentence['parsetree'] = " ".join(sentence['parsetree']) 98 | else: 99 | sentence['parsetree'].append(line) 100 | 101 | elif state == STATE_DEPENDENCY: 102 | if len(line) == 0: 103 | state = STATE_COREFERENCE 104 | else: 105 | split_entry = re.split("\(|, ", line[:-1]) 106 | if len(split_entry) == 3: 107 | rel, left, right = map(lambda x: remove_id(x), split_entry) 108 | sentence['dependencies'].append(tuple([rel,left,right])) 109 | 110 | elif state == STATE_COREFERENCE: 111 | if "Coreference set" in line: 112 | if 'coref' not in results: 113 | results['coref'] = [] 114 | coref_set = [] 115 | results['coref'].append(coref_set) 116 | else: 117 | for src_i, src_pos, src_l, src_r, sink_i, sink_pos, sink_l, sink_r, src_word, sink_word in CR_PATTERN.findall(line): 118 | src_i, src_pos, src_l, src_r = int(src_i)-1, int(src_pos)-1, int(src_l)-1, int(src_r)-1 119 | sink_i, sink_pos, sink_l, sink_r = int(sink_i)-1, int(sink_pos)-1, int(sink_l)-1, int(sink_r)-1 120 | coref_set.append(((src_word, src_i, src_pos, src_l, src_r), (sink_word, sink_i, sink_pos, sink_l, sink_r))) 121 | 122 | return results 123 | 124 | 125 | class StanfordCoreNLP(object): 126 | """ 127 | Command-line interaction with Stanford's CoreNLP java utilities. 128 | Can be run as a JSON-RPC server or imported as a module. 129 | """ 130 | def __init__(self, corenlp_path=None): 131 | """ 132 | Checks the location of the jar files. 133 | Spawns the server as a process. 134 | """ 135 | jars = ["stanford-corenlp-3.4.1.jar", 136 | "stanford-corenlp-3.4.1-models.jar", 137 | "joda-time.jar", 138 | "xom.jar", 139 | "jollyday.jar"] 140 | 141 | # if CoreNLP libraries are in a different directory, 142 | # change the corenlp_path variable to point to them 143 | if not corenlp_path: 144 | corenlp_path = "./stanford-corenlp-full-2014-08-27/" 145 | 146 | java_path = "java" 147 | classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP" 148 | # include the properties file, so you can change defaults 149 | # but any changes in output format will break parse_parser_results() 150 | props = "-props default.properties" 151 | 152 | # add and check classpaths 153 | jars = [corenlp_path + jar for jar in jars] 154 | for jar in jars: 155 | if not os.path.exists(jar): 156 | logger.error("Error! Cannot locate %s" % jar) 157 | sys.exit(1) 158 | 159 | # spawn the server 160 | start_corenlp = "%s -Xmx1800m -cp %s %s %s" % (java_path, ':'.join(jars), classname, props) 161 | if VERBOSE: 162 | logger.debug(start_corenlp) 163 | self.corenlp = pexpect.spawn(start_corenlp) 164 | 165 | # show progress bar while loading the models 166 | widgets = ['Loading Models: ', Fraction()] 167 | pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start() 168 | self.corenlp.expect("done.", timeout=20) # Load pos tagger model (~5sec) 169 | pbar.update(1) 170 | self.corenlp.expect("done.", timeout=200) # Load NER-all classifier (~33sec) 171 | pbar.update(2) 172 | self.corenlp.expect("done.", timeout=600) # Load NER-muc classifier (~60sec) 173 | pbar.update(3) 174 | self.corenlp.expect("done.", timeout=600) # Load CoNLL classifier (~50sec) 175 | pbar.update(4) 176 | self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec) 177 | pbar.update(5) 178 | self.corenlp.expect("Entering interactive shell.") 179 | pbar.finish() 180 | 181 | def _parse(self, text): 182 | """ 183 | This is the core interaction with the parser. 184 | 185 | It returns a Python data-structure, while the parse() 186 | function returns a JSON object 187 | """ 188 | # clean up anything leftover 189 | while True: 190 | try: 191 | self.corenlp.read_nonblocking (4000, 0.3) 192 | except pexpect.TIMEOUT: 193 | break 194 | 195 | self.corenlp.sendline(text) 196 | 197 | # How much time should we give the parser to parse it? 198 | # the idea here is that you increase the timeout as a 199 | # function of the text's length. 200 | # anything longer than 5 seconds requires that you also 201 | # increase timeout=5 in jsonrpc.py 202 | max_expected_time = min(40, 3 + len(text) / 20.0) 203 | end_time = time.time() + max_expected_time 204 | 205 | incoming = "" 206 | while True: 207 | # Time left, read more data 208 | try: 209 | incoming += self.corenlp.read_nonblocking(2000, 1) 210 | if "\nNLP>" in incoming: 211 | break 212 | time.sleep(0.0001) 213 | except pexpect.TIMEOUT: 214 | if end_time - time.time() < 0: 215 | logger.error("Error: Timeout with input '%s'" % (incoming)) 216 | return {'error': "timed out after %f seconds" % max_expected_time} 217 | else: 218 | continue 219 | except pexpect.EOF: 220 | break 221 | 222 | if VERBOSE: 223 | logger.debug("%s\n%s" % ('='*40, incoming)) 224 | try: 225 | results = parse_parser_results(incoming) 226 | except Exception, e: 227 | if VERBOSE: 228 | logger.debug(traceback.format_exc()) 229 | raise e 230 | 231 | return results 232 | 233 | def parse(self, text): 234 | """ 235 | This function takes a text string, sends it to the Stanford parser, 236 | reads in the result, parses the results and returns a list 237 | with one dictionary entry for each parsed sentence, in JSON format. 238 | """ 239 | response = self._parse(text) 240 | logger.debug("Response: '%s'" % (response)) 241 | return json.dumps(response) 242 | 243 | 244 | if __name__ == '__main__': 245 | """ 246 | The code below starts an JSONRPC server 247 | """ 248 | parser = optparse.OptionParser(usage="%prog [OPTIONS]") 249 | parser.add_option('-p', '--port', default='8080', 250 | help='Port to serve on (default: 8080)') 251 | parser.add_option('-H', '--host', default='127.0.0.1', 252 | help='Host to serve on (default: 127.0.0.1. Use 0.0.0.0 to make public)') 253 | options, args = parser.parse_args() 254 | server = jsonrpc.Server(jsonrpc.JsonRpc20(), 255 | jsonrpc.TransportTcpIp(addr=(options.host, int(options.port)))) 256 | 257 | nlp = StanfordCoreNLP() 258 | server.register_function(nlp.parse) 259 | 260 | logger.info('Serving on http://%s:%s' % (options.host, options.port)) 261 | server.serve() 262 | -------------------------------------------------------------------------------- /default.properties: -------------------------------------------------------------------------------- 1 | annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref 2 | 3 | # A true-casing annotator is also available (see below) 4 | #annotators = tokenize, ssplit, pos, lemma, truecase 5 | 6 | # A simple regex NER annotator is also available 7 | # annotators = tokenize, ssplit, regexner 8 | 9 | #Use these as EOS punctuation and discard them from the actual sentence content 10 | #These are HTML tags that get expanded internally to correct syntax, e.g., from "p" to "

", "

" etc. 11 | #Will have no effect if the "cleanxml" annotator is used 12 | #ssplit.htmlBoundariesToDiscard = p,text 13 | 14 | # 15 | # None of these paths are necessary anymore: we load all models from the JAR file 16 | # 17 | 18 | #pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-left3words/left3words-distsim-wsj-0-18.tagger 19 | ## slightly better model but much slower: 20 | ##pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-bidirectional/bidirectional-distsim-wsj-0-18.tagger 21 | 22 | #ner.model.3class = /u/nlp/data/ner/goodClassifiers/all.3class.distsim.crf.ser.gz 23 | #ner.model.7class = /u/nlp/data/ner/goodClassifiers/muc.distsim.crf.ser.gz 24 | #ner.model.MISCclass = /u/nlp/data/ner/goodClassifiers/conll.distsim.crf.ser.gz 25 | 26 | #regexner.mapping = /u/nlp/data/TAC-KBP2010/sentence_extraction/type_map_clean 27 | #regexner.ignorecase = false 28 | 29 | #nfl.gazetteer = /scr/nlp/data/machine-reading/Machine_Reading_P1_Reading_Task_V2.0/data/SportsDomain/NFLScoring_UseCase/NFLgazetteer.txt 30 | #nfl.relation.model = /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_relation_model.ser 31 | #nfl.entity.model = /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_entity_model.ser 32 | #printable.relation.beam = 20 33 | 34 | #parser.model = /u/nlp/data/lexparser/englishPCFG.ser.gz 35 | 36 | #srl.verb.args=/u/kristina/srl/verbs.core_args 37 | #srl.model.cls=/u/nlp/data/srl/trainedModels/englishPCFG/cls/train.ann 38 | #srl.model.id=/u/nlp/data/srl/trainedModels/englishPCFG/id/train.ann 39 | 40 | #coref.model=/u/nlp/rte/resources/anno/coref/corefClassifierAll.March2009.ser.gz 41 | #coref.name.dir=/u/nlp/data/coref/ 42 | #wordnet.dir=/u/nlp/data/wordnet/wordnet-3.0-prolog 43 | 44 | #dcoref.demonym = /scr/heeyoung/demonyms.txt 45 | #dcoref.animate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/animate.unigrams.txt 46 | #dcoref.inanimate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/inanimate.unigrams.txt 47 | #dcoref.male = /scr/nlp/data/Bergsma-Gender/male.unigrams.txt 48 | #dcoref.neutral = /scr/nlp/data/Bergsma-Gender/neutral.unigrams.txt 49 | #dcoref.female = /scr/nlp/data/Bergsma-Gender/female.unigrams.txt 50 | #dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt 51 | #dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt 52 | 53 | 54 | # This is the regular expression that describes which xml tags to keep 55 | # the text from. In order to on off the xml removal, add cleanxml 56 | # to the list of annotators above after "tokenize". 57 | #clean.xmltags = .* 58 | # A set of tags which will force the end of a sentence. HTML example: 59 | # you would not want to end on , but you would want to end on

. 60 | # Once again, a regular expression. 61 | # (Blank means there are no sentence enders.) 62 | #clean.sentenceendingtags = 63 | # Whether or not to allow malformed xml 64 | # StanfordCoreNLP.properties 65 | #wordnet.dir=models/wordnet-3.0-prolog 66 | -------------------------------------------------------------------------------- /jsonrpc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: ascii -*- 3 | """ 4 | JSON-RPC (remote procedure call). 5 | 6 | It consists of 3 (independent) parts: 7 | - proxy/dispatcher 8 | - data structure / serializer 9 | - transport 10 | 11 | It's intended for JSON-RPC, but since the above 3 parts are independent, 12 | it could be used for other RPCs as well. 13 | 14 | Currently, JSON-RPC 2.0(pre) and JSON-RPC 1.0 are implemented 15 | 16 | :Version: 2008-08-31-beta 17 | :Status: experimental 18 | 19 | :Example: 20 | simple Client with JsonRPC2.0 and TCP/IP:: 21 | 22 | >>> proxy = ServerProxy( JsonRpc20(), TransportTcpIp(addr=("127.0.0.1",31415)) ) 23 | >>> proxy.echo( "hello world" ) 24 | u'hello world' 25 | >>> proxy.echo( "bye." ) 26 | u'bye.' 27 | 28 | simple Server with JsonRPC2.0 and TCP/IP with logging to STDOUT:: 29 | 30 | >>> server = Server( JsonRpc20(), TransportTcpIp(addr=("127.0.0.1",31415), logfunc=log_stdout) ) 31 | >>> def echo( s ): 32 | ... return s 33 | >>> server.register_function( echo ) 34 | >>> server.serve( 2 ) # serve 2 requests # doctest: +ELLIPSIS 35 | listen ('127.0.0.1', 31415) 36 | ('127.0.0.1', ...) connected 37 | ('127.0.0.1', ...) <-- {"jsonrpc": "2.0", "method": "echo", "params": ["hello world"], "id": 0} 38 | ('127.0.0.1', ...) --> {"jsonrpc": "2.0", "result": "hello world", "id": 0} 39 | ('127.0.0.1', ...) close 40 | ('127.0.0.1', ...) connected 41 | ('127.0.0.1', ...) <-- {"jsonrpc": "2.0", "method": "echo", "params": ["bye."], "id": 0} 42 | ('127.0.0.1', ...) --> {"jsonrpc": "2.0", "result": "bye.", "id": 0} 43 | ('127.0.0.1', ...) close 44 | close ('127.0.0.1', 31415) 45 | 46 | Client with JsonRPC2.0 and an abstract Unix Domain Socket:: 47 | 48 | >>> proxy = ServerProxy( JsonRpc20(), TransportUnixSocket(addr="\\x00.rpcsocket") ) 49 | >>> proxy.hi( message="hello" ) #named parameters 50 | u'hi there' 51 | >>> proxy.test() #fault 52 | Traceback (most recent call last): 53 | ... 54 | jsonrpc.RPCMethodNotFound: 55 | >>> proxy.debug.echo( "hello world" ) #hierarchical procedures 56 | u'hello world' 57 | 58 | Server with JsonRPC2.0 and abstract Unix Domain Socket with a logfile:: 59 | 60 | >>> server = Server( JsonRpc20(), TransportUnixSocket(addr="\\x00.rpcsocket", logfunc=log_file("mylog.txt")) ) 61 | >>> def echo( s ): 62 | ... return s 63 | >>> def hi( message ): 64 | ... return "hi there" 65 | >>> server.register_function( hi ) 66 | >>> server.register_function( echo, name="debug.echo" ) 67 | >>> server.serve( 3 ) # serve 3 requests 68 | 69 | "mylog.txt" then contains: 70 | listen '\\x00.rpcsocket' 71 | '' connected 72 | '' --> '{"jsonrpc": "2.0", "method": "hi", "params": {"message": "hello"}, "id": 0}' 73 | '' <-- '{"jsonrpc": "2.0", "result": "hi there", "id": 0}' 74 | '' close 75 | '' connected 76 | '' --> '{"jsonrpc": "2.0", "method": "test", "id": 0}' 77 | '' <-- '{"jsonrpc": "2.0", "error": {"code":-32601, "message": "Method not found."}, "id": 0}' 78 | '' close 79 | '' connected 80 | '' --> '{"jsonrpc": "2.0", "method": "debug.echo", "params": ["hello world"], "id": 0}' 81 | '' <-- '{"jsonrpc": "2.0", "result": "hello world", "id": 0}' 82 | '' close 83 | close '\\x00.rpcsocket' 84 | 85 | :Note: all exceptions derived from RPCFault are propagated to the client. 86 | other exceptions are logged and result in a sent-back "empty" INTERNAL_ERROR. 87 | :Uses: simplejson, socket, sys,time,codecs 88 | :SeeAlso: JSON-RPC 2.0 proposal, 1.0 specification 89 | :Warning: 90 | .. Warning:: 91 | This is **experimental** code! 92 | :Bug: 93 | 94 | :Author: Roland Koebler (rk(at)simple-is-better.org) 95 | :Copyright: 2007-2008 by Roland Koebler (rk(at)simple-is-better.org) 96 | :License: see __license__ 97 | :Changelog: 98 | - 2008-08-31: 1st release 99 | 100 | TODO: 101 | - server: multithreading rpc-server 102 | - client: multicall (send several requests) 103 | - transport: SSL sockets, maybe HTTP, HTTPS 104 | - types: support for date/time (ISO 8601) 105 | - errors: maybe customizable error-codes/exceptions 106 | - mixed 1.0/2.0 server ? 107 | - system description etc. ? 108 | - maybe test other json-serializers, like cjson? 109 | """ 110 | 111 | __version__ = "2008-08-31-beta" 112 | __author__ = "Roland Koebler " 113 | __license__ = """Copyright (c) 2007-2008 by Roland Koebler (rk(at)simple-is-better.org) 114 | 115 | Permission is hereby granted, free of charge, to any person obtaining 116 | a copy of this software and associated documentation files (the 117 | "Software"), to deal in the Software without restriction, including 118 | without limitation the rights to use, copy, modify, merge, publish, 119 | distribute, sublicense, and/or sell copies of the Software, and to 120 | permit persons to whom the Software is furnished to do so, subject to 121 | the following conditions: 122 | 123 | The above copyright notice and this permission notice shall be included 124 | in all copies or substantial portions of the Software. 125 | 126 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 127 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 128 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 129 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 130 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 131 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 132 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.""" 133 | 134 | #========================================= 135 | #import 136 | 137 | import sys 138 | 139 | try: 140 | import json 141 | except ImportError: 142 | import simplejson as json 143 | 144 | 145 | #========================================= 146 | # errors 147 | 148 | #---------------------- 149 | # error-codes + exceptions 150 | 151 | #JSON-RPC 2.0 error-codes 152 | PARSE_ERROR = -32700 153 | INVALID_REQUEST = -32600 154 | METHOD_NOT_FOUND = -32601 155 | INVALID_METHOD_PARAMS = -32602 #invalid number/type of parameters 156 | INTERNAL_ERROR = -32603 #"all other errors" 157 | 158 | #additional error-codes 159 | PROCEDURE_EXCEPTION = -32000 160 | AUTHENTIFICATION_ERROR = -32001 161 | PERMISSION_DENIED = -32002 162 | INVALID_PARAM_VALUES = -32003 163 | 164 | #human-readable messages 165 | ERROR_MESSAGE = { 166 | PARSE_ERROR : "Parse error.", 167 | INVALID_REQUEST : "Invalid Request.", 168 | METHOD_NOT_FOUND : "Method not found.", 169 | INVALID_METHOD_PARAMS : "Invalid parameters.", 170 | INTERNAL_ERROR : "Internal error.", 171 | 172 | PROCEDURE_EXCEPTION : "Procedure exception.", 173 | AUTHENTIFICATION_ERROR : "Authentification error.", 174 | PERMISSION_DENIED : "Permission denied.", 175 | INVALID_PARAM_VALUES: "Invalid parameter values." 176 | } 177 | 178 | #---------------------- 179 | # exceptions 180 | 181 | class RPCError(Exception): 182 | """Base class for rpc-errors.""" 183 | 184 | 185 | class RPCTransportError(RPCError): 186 | """Transport error.""" 187 | class RPCTimeoutError(RPCTransportError): 188 | """Transport/reply timeout.""" 189 | 190 | class RPCFault(RPCError): 191 | """RPC error/fault package received. 192 | 193 | This exception can also be used as a class, to generate a 194 | RPC-error/fault message. 195 | 196 | :Variables: 197 | - error_code: the RPC error-code 198 | - error_string: description of the error 199 | - error_data: optional additional information 200 | (must be json-serializable) 201 | :TODO: improve __str__ 202 | """ 203 | def __init__(self, error_code, error_message, error_data=None): 204 | RPCError.__init__(self) 205 | self.error_code = error_code 206 | self.error_message = error_message 207 | self.error_data = error_data 208 | def __str__(self): 209 | return repr(self) 210 | def __repr__(self): 211 | return( "" % (self.error_code, repr(self.error_message), repr(self.error_data)) ) 212 | 213 | class RPCParseError(RPCFault): 214 | """Broken rpc-package. (PARSE_ERROR)""" 215 | def __init__(self, error_data=None): 216 | RPCFault.__init__(self, PARSE_ERROR, ERROR_MESSAGE[PARSE_ERROR], error_data) 217 | 218 | class RPCInvalidRPC(RPCFault): 219 | """Invalid rpc-package. (INVALID_REQUEST)""" 220 | def __init__(self, error_data=None): 221 | RPCFault.__init__(self, INVALID_REQUEST, ERROR_MESSAGE[INVALID_REQUEST], error_data) 222 | 223 | class RPCMethodNotFound(RPCFault): 224 | """Method not found. (METHOD_NOT_FOUND)""" 225 | def __init__(self, error_data=None): 226 | RPCFault.__init__(self, METHOD_NOT_FOUND, ERROR_MESSAGE[METHOD_NOT_FOUND], error_data) 227 | 228 | class RPCInvalidMethodParams(RPCFault): 229 | """Invalid method-parameters. (INVALID_METHOD_PARAMS)""" 230 | def __init__(self, error_data=None): 231 | RPCFault.__init__(self, INVALID_METHOD_PARAMS, ERROR_MESSAGE[INVALID_METHOD_PARAMS], error_data) 232 | 233 | class RPCInternalError(RPCFault): 234 | """Internal error. (INTERNAL_ERROR)""" 235 | def __init__(self, error_data=None): 236 | RPCFault.__init__(self, INTERNAL_ERROR, ERROR_MESSAGE[INTERNAL_ERROR], error_data) 237 | 238 | 239 | class RPCProcedureException(RPCFault): 240 | """Procedure exception. (PROCEDURE_EXCEPTION)""" 241 | def __init__(self, error_data=None): 242 | RPCFault.__init__(self, PROCEDURE_EXCEPTION, ERROR_MESSAGE[PROCEDURE_EXCEPTION], error_data) 243 | class RPCAuthentificationError(RPCFault): 244 | """AUTHENTIFICATION_ERROR""" 245 | def __init__(self, error_data=None): 246 | RPCFault.__init__(self, AUTHENTIFICATION_ERROR, ERROR_MESSAGE[AUTHENTIFICATION_ERROR], error_data) 247 | class RPCPermissionDenied(RPCFault): 248 | """PERMISSION_DENIED""" 249 | def __init__(self, error_data=None): 250 | RPCFault.__init__(self, PERMISSION_DENIED, ERROR_MESSAGE[PERMISSION_DENIED], error_data) 251 | class RPCInvalidParamValues(RPCFault): 252 | """INVALID_PARAM_VALUES""" 253 | def __init__(self, error_data=None): 254 | RPCFault.__init__(self, INVALID_PARAM_VALUES, ERROR_MESSAGE[INVALID_PARAM_VALUES], error_data) 255 | 256 | 257 | #========================================= 258 | # data structure / serializer 259 | 260 | #---------------------- 261 | # 262 | def dictkeyclean(d): 263 | """Convert all keys of the dict 'd' to (ascii-)strings. 264 | 265 | :Raises: UnicodeEncodeError 266 | """ 267 | new_d = {} 268 | for (k, v) in d.iteritems(): 269 | new_d[str(k)] = v 270 | return new_d 271 | 272 | #---------------------- 273 | # JSON-RPC 1.0 274 | 275 | class JsonRpc10: 276 | """JSON-RPC V1.0 data-structure / serializer 277 | 278 | This implementation is quite liberal in what it accepts: It treats 279 | missing "params" and "id" in Requests and missing "result"/"error" in 280 | Responses as empty/null. 281 | 282 | :SeeAlso: JSON-RPC 1.0 specification 283 | :TODO: catch simplejson.dumps not-serializable-exceptions 284 | """ 285 | def __init__(self, dumps=json.dumps, loads=json.loads): 286 | """init: set serializer to use 287 | 288 | :Parameters: 289 | - dumps: json-encoder-function 290 | - loads: json-decoder-function 291 | :Note: The dumps_* functions of this class already directly create 292 | the invariant parts of the resulting json-object themselves, 293 | without using the given json-encoder-function. 294 | """ 295 | self.dumps = dumps 296 | self.loads = loads 297 | 298 | def dumps_request( self, method, params=(), id=0 ): 299 | """serialize JSON-RPC-Request 300 | 301 | :Parameters: 302 | - method: the method-name (str/unicode) 303 | - params: the parameters (list/tuple) 304 | - id: if id=None, this results in a Notification 305 | :Returns: | {"method": "...", "params": ..., "id": ...} 306 | | "method", "params" and "id" are always in this order. 307 | :Raises: TypeError if method/params is of wrong type or 308 | not JSON-serializable 309 | """ 310 | if not isinstance(method, (str, unicode)): 311 | raise TypeError('"method" must be a string (or unicode string).') 312 | if not isinstance(params, (tuple, list)): 313 | raise TypeError("params must be a tuple/list.") 314 | 315 | return '{"method": %s, "params": %s, "id": %s}' % \ 316 | (self.dumps(method), self.dumps(params), self.dumps(id)) 317 | 318 | def dumps_notification( self, method, params=() ): 319 | """serialize a JSON-RPC-Notification 320 | 321 | :Parameters: see dumps_request 322 | :Returns: | {"method": "...", "params": ..., "id": null} 323 | | "method", "params" and "id" are always in this order. 324 | :Raises: see dumps_request 325 | """ 326 | if not isinstance(method, (str, unicode)): 327 | raise TypeError('"method" must be a string (or unicode string).') 328 | if not isinstance(params, (tuple, list)): 329 | raise TypeError("params must be a tuple/list.") 330 | 331 | return '{"method": %s, "params": %s, "id": null}' % \ 332 | (self.dumps(method), self.dumps(params)) 333 | 334 | def dumps_response( self, result, id=None ): 335 | """serialize a JSON-RPC-Response (without error) 336 | 337 | :Returns: | {"result": ..., "error": null, "id": ...} 338 | | "result", "error" and "id" are always in this order. 339 | :Raises: TypeError if not JSON-serializable 340 | """ 341 | return '{"result": %s, "error": null, "id": %s}' % \ 342 | (self.dumps(result), self.dumps(id)) 343 | 344 | def dumps_error( self, error, id=None ): 345 | """serialize a JSON-RPC-Response-error 346 | 347 | Since JSON-RPC 1.0 does not define an error-object, this uses the 348 | JSON-RPC 2.0 error-object. 349 | 350 | :Parameters: 351 | - error: a RPCFault instance 352 | :Returns: | {"result": null, "error": {"code": error_code, "message": error_message, "data": error_data}, "id": ...} 353 | | "result", "error" and "id" are always in this order, data is omitted if None. 354 | :Raises: ValueError if error is not a RPCFault instance, 355 | TypeError if not JSON-serializable 356 | """ 357 | if not isinstance(error, RPCFault): 358 | raise ValueError("""error must be a RPCFault-instance.""") 359 | if error.error_data is None: 360 | return '{"result": null, "error": {"code":%s, "message": %s}, "id": %s}' % \ 361 | (self.dumps(error.error_code), self.dumps(error.error_message), self.dumps(id)) 362 | else: 363 | return '{"result": null, "error": {"code":%s, "message": %s, "data": %s}, "id": %s}' % \ 364 | (self.dumps(error.error_code), self.dumps(error.error_message), self.dumps(error.error_data), self.dumps(id)) 365 | 366 | def loads_request( self, string ): 367 | """de-serialize a JSON-RPC Request/Notification 368 | 369 | :Returns: | [method_name, params, id] or [method_name, params] 370 | | params is a tuple/list 371 | | if id is missing, this is a Notification 372 | :Raises: RPCParseError, RPCInvalidRPC, RPCInvalidMethodParams 373 | """ 374 | try: 375 | data = self.loads(string) 376 | except ValueError, err: 377 | raise RPCParseError("No valid JSON. (%s)" % str(err)) 378 | if not isinstance(data, dict): raise RPCInvalidRPC("No valid RPC-package.") 379 | if "method" not in data: raise RPCInvalidRPC("""Invalid Request, "method" is missing.""") 380 | if not isinstance(data["method"], (str, unicode)): 381 | raise RPCInvalidRPC("""Invalid Request, "method" must be a string.""") 382 | if "id" not in data: data["id"] = None #be liberal 383 | if "params" not in data: data["params"] = () #be liberal 384 | if not isinstance(data["params"], (list, tuple)): 385 | raise RPCInvalidRPC("""Invalid Request, "params" must be an array.""") 386 | if len(data) != 3: raise RPCInvalidRPC("""Invalid Request, additional fields found.""") 387 | # notification / request 388 | if data["id"] is None: 389 | return data["method"], data["params"] #notification 390 | else: 391 | return data["method"], data["params"], data["id"] #request 392 | 393 | def loads_response( self, string ): 394 | """de-serialize a JSON-RPC Response/error 395 | 396 | :Returns: | [result, id] for Responses 397 | :Raises: | RPCFault+derivates for error-packages/faults, RPCParseError, RPCInvalidRPC 398 | | Note that for error-packages which do not match the 399 | V2.0-definition, RPCFault(-1, "Error", RECEIVED_ERROR_OBJ) 400 | is raised. 401 | """ 402 | try: 403 | data = self.loads(string) 404 | except ValueError, err: 405 | raise RPCParseError("No valid JSON. (%s)" % str(err)) 406 | if not isinstance(data, dict): raise RPCInvalidRPC("No valid RPC-package.") 407 | if "id" not in data: raise RPCInvalidRPC("""Invalid Response, "id" missing.""") 408 | if "result" not in data: data["result"] = None #be liberal 409 | if "error" not in data: data["error"] = None #be liberal 410 | if len(data) != 3: raise RPCInvalidRPC("""Invalid Response, additional or missing fields.""") 411 | 412 | #error 413 | if data["error"] is not None: 414 | if data["result"] is not None: 415 | raise RPCInvalidRPC("""Invalid Response, one of "result" or "error" must be null.""") 416 | #v2.0 error-format 417 | if( isinstance(data["error"], dict) and "code" in data["error"] and "message" in data["error"] and 418 | (len(data["error"])==2 or ("data" in data["error"] and len(data["error"])==3)) ): 419 | if "data" not in data["error"]: 420 | error_data = None 421 | else: 422 | error_data = data["error"]["data"] 423 | 424 | if data["error"]["code"] == PARSE_ERROR: 425 | raise RPCParseError(error_data) 426 | elif data["error"]["code"] == INVALID_REQUEST: 427 | raise RPCInvalidRPC(error_data) 428 | elif data["error"]["code"] == METHOD_NOT_FOUND: 429 | raise RPCMethodNotFound(error_data) 430 | elif data["error"]["code"] == INVALID_METHOD_PARAMS: 431 | raise RPCInvalidMethodParams(error_data) 432 | elif data["error"]["code"] == INTERNAL_ERROR: 433 | raise RPCInternalError(error_data) 434 | elif data["error"]["code"] == PROCEDURE_EXCEPTION: 435 | raise RPCProcedureException(error_data) 436 | elif data["error"]["code"] == AUTHENTIFICATION_ERROR: 437 | raise RPCAuthentificationError(error_data) 438 | elif data["error"]["code"] == PERMISSION_DENIED: 439 | raise RPCPermissionDenied(error_data) 440 | elif data["error"]["code"] == INVALID_PARAM_VALUES: 441 | raise RPCInvalidParamValues(error_data) 442 | else: 443 | raise RPCFault(data["error"]["code"], data["error"]["message"], error_data) 444 | #other error-format 445 | else: 446 | raise RPCFault(-1, "Error", data["error"]) 447 | #result 448 | else: 449 | return data["result"], data["id"] 450 | 451 | #---------------------- 452 | # JSON-RPC 2.0 453 | 454 | class JsonRpc20: 455 | """JSON-RPC V2.0 data-structure / serializer 456 | 457 | :SeeAlso: JSON-RPC 2.0 specification 458 | :TODO: catch simplejson.dumps not-serializable-exceptions 459 | """ 460 | def __init__(self, dumps=json.dumps, loads=json.loads): 461 | """init: set serializer to use 462 | 463 | :Parameters: 464 | - dumps: json-encoder-function 465 | - loads: json-decoder-function 466 | :Note: The dumps_* functions of this class already directly create 467 | the invariant parts of the resulting json-object themselves, 468 | without using the given json-encoder-function. 469 | """ 470 | self.dumps = dumps 471 | self.loads = loads 472 | 473 | def dumps_request( self, method, params=(), id=0 ): 474 | """serialize JSON-RPC-Request 475 | 476 | :Parameters: 477 | - method: the method-name (str/unicode) 478 | - params: the parameters (list/tuple/dict) 479 | - id: the id (should not be None) 480 | :Returns: | {"jsonrpc": "2.0", "method": "...", "params": ..., "id": ...} 481 | | "jsonrpc", "method", "params" and "id" are always in this order. 482 | | "params" is omitted if empty 483 | :Raises: TypeError if method/params is of wrong type or 484 | not JSON-serializable 485 | """ 486 | if not isinstance(method, (str, unicode)): 487 | raise TypeError('"method" must be a string (or unicode string).') 488 | if not isinstance(params, (tuple, list, dict)): 489 | raise TypeError("params must be a tuple/list/dict or None.") 490 | 491 | if params: 492 | return '{"jsonrpc": "2.0", "method": %s, "params": %s, "id": %s}' % \ 493 | (self.dumps(method), self.dumps(params), self.dumps(id)) 494 | else: 495 | return '{"jsonrpc": "2.0", "method": %s, "id": %s}' % \ 496 | (self.dumps(method), self.dumps(id)) 497 | 498 | def dumps_notification( self, method, params=() ): 499 | """serialize a JSON-RPC-Notification 500 | 501 | :Parameters: see dumps_request 502 | :Returns: | {"jsonrpc": "2.0", "method": "...", "params": ...} 503 | | "jsonrpc", "method" and "params" are always in this order. 504 | :Raises: see dumps_request 505 | """ 506 | if not isinstance(method, (str, unicode)): 507 | raise TypeError('"method" must be a string (or unicode string).') 508 | if not isinstance(params, (tuple, list, dict)): 509 | raise TypeError("params must be a tuple/list/dict or None.") 510 | 511 | if params: 512 | return '{"jsonrpc": "2.0", "method": %s, "params": %s}' % \ 513 | (self.dumps(method), self.dumps(params)) 514 | else: 515 | return '{"jsonrpc": "2.0", "method": %s}' % \ 516 | (self.dumps(method)) 517 | 518 | def dumps_response( self, result, id=None ): 519 | """serialize a JSON-RPC-Response (without error) 520 | 521 | :Returns: | {"jsonrpc": "2.0", "result": ..., "id": ...} 522 | | "jsonrpc", "result", and "id" are always in this order. 523 | :Raises: TypeError if not JSON-serializable 524 | """ 525 | return '{"jsonrpc": "2.0", "result": %s, "id": %s}' % \ 526 | (self.dumps(result), self.dumps(id)) 527 | 528 | def dumps_error( self, error, id=None ): 529 | """serialize a JSON-RPC-Response-error 530 | 531 | :Parameters: 532 | - error: a RPCFault instance 533 | :Returns: | {"jsonrpc": "2.0", "error": {"code": error_code, "message": error_message, "data": error_data}, "id": ...} 534 | | "jsonrpc", "result", "error" and "id" are always in this order, data is omitted if None. 535 | :Raises: ValueError if error is not a RPCFault instance, 536 | TypeError if not JSON-serializable 537 | """ 538 | if not isinstance(error, RPCFault): 539 | raise ValueError("""error must be a RPCFault-instance.""") 540 | if error.error_data is None: 541 | return '{"jsonrpc": "2.0", "error": {"code":%s, "message": %s}, "id": %s}' % \ 542 | (self.dumps(error.error_code), self.dumps(error.error_message), self.dumps(id)) 543 | else: 544 | return '{"jsonrpc": "2.0", "error": {"code":%s, "message": %s, "data": %s}, "id": %s}' % \ 545 | (self.dumps(error.error_code), self.dumps(error.error_message), self.dumps(error.error_data), self.dumps(id)) 546 | 547 | def loads_request( self, string ): 548 | """de-serialize a JSON-RPC Request/Notification 549 | 550 | :Returns: | [method_name, params, id] or [method_name, params] 551 | | params is a tuple/list or dict (with only str-keys) 552 | | if id is missing, this is a Notification 553 | :Raises: RPCParseError, RPCInvalidRPC, RPCInvalidMethodParams 554 | """ 555 | try: 556 | data = self.loads(string) 557 | except ValueError, err: 558 | raise RPCParseError("No valid JSON. (%s)" % str(err)) 559 | if not isinstance(data, dict): raise RPCInvalidRPC("No valid RPC-package.") 560 | if "jsonrpc" not in data: raise RPCInvalidRPC("""Invalid Response, "jsonrpc" missing.""") 561 | if not isinstance(data["jsonrpc"], (str, unicode)): 562 | raise RPCInvalidRPC("""Invalid Response, "jsonrpc" must be a string.""") 563 | if data["jsonrpc"] != "2.0": raise RPCInvalidRPC("""Invalid jsonrpc version.""") 564 | if "method" not in data: raise RPCInvalidRPC("""Invalid Request, "method" is missing.""") 565 | if not isinstance(data["method"], (str, unicode)): 566 | raise RPCInvalidRPC("""Invalid Request, "method" must be a string.""") 567 | if "params" not in data: data["params"] = () 568 | #convert params-keys from unicode to str 569 | elif isinstance(data["params"], dict): 570 | try: 571 | data["params"] = dictkeyclean(data["params"]) 572 | except UnicodeEncodeError: 573 | raise RPCInvalidMethodParams("Parameter-names must be in ascii.") 574 | elif not isinstance(data["params"], (list, tuple)): 575 | raise RPCInvalidRPC("""Invalid Request, "params" must be an array or object.""") 576 | if not( len(data)==3 or ("id" in data and len(data)==4) ): 577 | raise RPCInvalidRPC("""Invalid Request, additional fields found.""") 578 | 579 | # notification / request 580 | if "id" not in data: 581 | return data["method"], data["params"] #notification 582 | else: 583 | return data["method"], data["params"], data["id"] #request 584 | 585 | def loads_response( self, string ): 586 | """de-serialize a JSON-RPC Response/error 587 | 588 | :Returns: | [result, id] for Responses 589 | :Raises: | RPCFault+derivates for error-packages/faults, RPCParseError, RPCInvalidRPC 590 | """ 591 | try: 592 | data = self.loads(string) 593 | except ValueError, err: 594 | raise RPCParseError("No valid JSON. (%s)" % str(err)) 595 | if not isinstance(data, dict): raise RPCInvalidRPC("No valid RPC-package.") 596 | if "jsonrpc" not in data: raise RPCInvalidRPC("""Invalid Response, "jsonrpc" missing.""") 597 | if not isinstance(data["jsonrpc"], (str, unicode)): 598 | raise RPCInvalidRPC("""Invalid Response, "jsonrpc" must be a string.""") 599 | if data["jsonrpc"] != "2.0": raise RPCInvalidRPC("""Invalid jsonrpc version.""") 600 | if "id" not in data: raise RPCInvalidRPC("""Invalid Response, "id" missing.""") 601 | if "result" not in data: data["result"] = None 602 | if "error" not in data: data["error"] = None 603 | if len(data) != 4: raise RPCInvalidRPC("""Invalid Response, additional or missing fields.""") 604 | 605 | #error 606 | if data["error"] is not None: 607 | if data["result"] is not None: 608 | raise RPCInvalidRPC("""Invalid Response, only "result" OR "error" allowed.""") 609 | if not isinstance(data["error"], dict): raise RPCInvalidRPC("Invalid Response, invalid error-object.") 610 | if "code" not in data["error"] or "message" not in data["error"]: 611 | raise RPCInvalidRPC("Invalid Response, invalid error-object.") 612 | if "data" not in data["error"]: data["error"]["data"] = None 613 | if len(data["error"]) != 3: 614 | raise RPCInvalidRPC("Invalid Response, invalid error-object.") 615 | 616 | error_data = data["error"]["data"] 617 | if data["error"]["code"] == PARSE_ERROR: 618 | raise RPCParseError(error_data) 619 | elif data["error"]["code"] == INVALID_REQUEST: 620 | raise RPCInvalidRPC(error_data) 621 | elif data["error"]["code"] == METHOD_NOT_FOUND: 622 | raise RPCMethodNotFound(error_data) 623 | elif data["error"]["code"] == INVALID_METHOD_PARAMS: 624 | raise RPCInvalidMethodParams(error_data) 625 | elif data["error"]["code"] == INTERNAL_ERROR: 626 | raise RPCInternalError(error_data) 627 | elif data["error"]["code"] == PROCEDURE_EXCEPTION: 628 | raise RPCProcedureException(error_data) 629 | elif data["error"]["code"] == AUTHENTIFICATION_ERROR: 630 | raise RPCAuthentificationError(error_data) 631 | elif data["error"]["code"] == PERMISSION_DENIED: 632 | raise RPCPermissionDenied(error_data) 633 | elif data["error"]["code"] == INVALID_PARAM_VALUES: 634 | raise RPCInvalidParamValues(error_data) 635 | else: 636 | raise RPCFault(data["error"]["code"], data["error"]["message"], error_data) 637 | #result 638 | else: 639 | return data["result"], data["id"] 640 | 641 | 642 | #========================================= 643 | # transports 644 | 645 | #---------------------- 646 | # transport-logging 647 | 648 | import codecs 649 | import time 650 | 651 | def log_dummy( message ): 652 | """dummy-logger: do nothing""" 653 | pass 654 | def log_stdout( message ): 655 | """print message to STDOUT""" 656 | print message 657 | 658 | def log_file( filename ): 659 | """return a logfunc which logs to a file (in utf-8)""" 660 | def logfile( message ): 661 | f = codecs.open( filename, 'a', encoding='utf-8' ) 662 | f.write( message+"\n" ) 663 | f.close() 664 | return logfile 665 | 666 | def log_filedate( filename ): 667 | """return a logfunc which logs date+message to a file (in utf-8)""" 668 | def logfile( message ): 669 | f = codecs.open( filename, 'a', encoding='utf-8' ) 670 | f.write( time.strftime("%Y-%m-%d %H:%M:%S ")+message+"\n" ) 671 | f.close() 672 | return logfile 673 | 674 | #---------------------- 675 | 676 | class Transport: 677 | """generic Transport-interface. 678 | 679 | This class, and especially its methods and docstrings, 680 | define the Transport-Interface. 681 | """ 682 | def __init__(self): 683 | pass 684 | 685 | def send( self, data ): 686 | """send all data. must be implemented by derived classes.""" 687 | raise NotImplementedError 688 | def recv( self ): 689 | """receive data. must be implemented by derived classes.""" 690 | raise NotImplementedError 691 | 692 | def sendrecv( self, string ): 693 | """send + receive data""" 694 | self.send( string ) 695 | return self.recv() 696 | def serve( self, handler, n=None ): 697 | """serve (forever or for n communicaions). 698 | 699 | - receive data 700 | - call result = handler(data) 701 | - send back result if not None 702 | 703 | The serving can be stopped by SIGINT. 704 | 705 | :TODO: 706 | - how to stop? 707 | maybe use a .run-file, and stop server if file removed? 708 | - maybe make n_current accessible? (e.g. for logging) 709 | """ 710 | n_current = 0 711 | while 1: 712 | if n is not None and n_current >= n: 713 | break 714 | data = self.recv() 715 | result = handler(data) 716 | if result is not None: 717 | self.send( result ) 718 | n_current += 1 719 | 720 | 721 | class TransportSTDINOUT(Transport): 722 | """receive from STDIN, send to STDOUT. 723 | 724 | Useful e.g. for debugging. 725 | """ 726 | def send(self, string): 727 | """write data to STDOUT with '***SEND:' prefix """ 728 | print "***SEND:" 729 | print string 730 | def recv(self): 731 | """read data from STDIN""" 732 | print "***RECV (please enter, ^D ends.):" 733 | return sys.stdin.read() 734 | 735 | 736 | import socket, select 737 | class TransportSocket(Transport): 738 | """Transport via socket. 739 | 740 | :SeeAlso: python-module socket 741 | :TODO: 742 | - documentation 743 | - improve this (e.g. make sure that connections are closed, socket-files are deleted etc.) 744 | - exception-handling? (socket.error) 745 | """ 746 | def __init__( self, addr, limit=4096, sock_type=socket.AF_INET, sock_prot=socket.SOCK_STREAM, timeout=5.0, logfunc=log_dummy ): 747 | """ 748 | :Parameters: 749 | - addr: socket-address 750 | - timeout: timeout in seconds 751 | - logfunc: function for logging, logfunc(message) 752 | :Raises: socket.timeout after timeout 753 | """ 754 | self.limit = limit 755 | self.addr = addr 756 | self.s_type = sock_type 757 | self.s_prot = sock_prot 758 | self.s = None 759 | self.timeout = timeout 760 | self.log = logfunc 761 | def connect( self ): 762 | self.close() 763 | self.log( "connect to %s" % repr(self.addr) ) 764 | self.s = socket.socket( self.s_type, self.s_prot ) 765 | self.s.settimeout( self.timeout ) 766 | self.s.connect( self.addr ) 767 | def close( self ): 768 | if self.s is not None: 769 | self.log( "close %s" % repr(self.addr) ) 770 | self.s.close() 771 | self.s = None 772 | def __repr__(self): 773 | return "" % repr(self.addr) 774 | 775 | def send( self, string ): 776 | if self.s is None: 777 | self.connect() 778 | self.log( "--> "+repr(string) ) 779 | self.s.sendall( string ) 780 | def recv( self ): 781 | if self.s is None: 782 | self.connect() 783 | data = self.s.recv( self.limit ) 784 | while( select.select((self.s,), (), (), 0.1)[0] ): #TODO: this select is probably not necessary, because server closes this socket 785 | d = self.s.recv( self.limit ) 786 | if len(d) == 0: 787 | break 788 | data += d 789 | self.log( "<-- "+repr(data) ) 790 | return data 791 | 792 | def sendrecv( self, string ): 793 | """send data + receive data + close""" 794 | try: 795 | self.send( string ) 796 | return self.recv() 797 | finally: 798 | self.close() 799 | def serve(self, handler, n=None): 800 | """open socket, wait for incoming connections and handle them. 801 | 802 | :Parameters: 803 | - n: serve n requests, None=forever 804 | """ 805 | self.close() 806 | self.s = socket.socket( self.s_type, self.s_prot ) 807 | try: 808 | self.log( "listen %s" % repr(self.addr) ) 809 | self.s.bind( self.addr ) 810 | self.s.listen(1) 811 | n_current = 0 812 | while 1: 813 | if n is not None and n_current >= n: 814 | break 815 | conn, addr = self.s.accept() 816 | self.log( "%s connected" % repr(addr) ) 817 | data = conn.recv(self.limit) 818 | self.log( "%s --> %s" % (repr(addr), repr(data)) ) 819 | result = handler(data) 820 | if data is not None: 821 | self.log( "%s <-- %s" % (repr(addr), repr(result)) ) 822 | conn.send( result ) 823 | self.log( "%s close" % repr(addr) ) 824 | conn.close() 825 | n_current += 1 826 | finally: 827 | self.close() 828 | 829 | 830 | if hasattr(socket, 'AF_UNIX'): 831 | 832 | class TransportUnixSocket(TransportSocket): 833 | """Transport via Unix Domain Socket. 834 | """ 835 | def __init__(self, addr=None, limit=4096, timeout=5.0, logfunc=log_dummy): 836 | """ 837 | :Parameters: 838 | - addr: "socket_file" 839 | :Note: | The socket-file is not deleted. 840 | | If the socket-file begins with \x00, abstract sockets are used, 841 | and no socket-file is created. 842 | :SeeAlso: TransportSocket 843 | """ 844 | TransportSocket.__init__( self, addr, limit, socket.AF_UNIX, socket.SOCK_STREAM, timeout, logfunc ) 845 | 846 | class TransportTcpIp(TransportSocket): 847 | """Transport via TCP/IP. 848 | """ 849 | def __init__(self, addr=None, limit=4096, timeout=5.0, logfunc=log_dummy): 850 | """ 851 | :Parameters: 852 | - addr: ("host",port) 853 | :SeeAlso: TransportSocket 854 | """ 855 | TransportSocket.__init__( self, addr, limit, socket.AF_INET, socket.SOCK_STREAM, timeout, logfunc ) 856 | 857 | 858 | #========================================= 859 | # client side: server proxy 860 | 861 | class ServerProxy: 862 | """RPC-client: server proxy 863 | 864 | A logical connection to a RPC server. 865 | 866 | It works with different data/serializers and different transports. 867 | 868 | Notifications and id-handling/multicall are not yet implemented. 869 | 870 | :Example: 871 | see module-docstring 872 | 873 | :TODO: verbose/logging? 874 | """ 875 | def __init__( self, data_serializer, transport ): 876 | """ 877 | :Parameters: 878 | - data_serializer: a data_structure+serializer-instance 879 | - transport: a Transport instance 880 | """ 881 | #TODO: check parameters 882 | self.__data_serializer = data_serializer 883 | if not isinstance(transport, Transport): 884 | raise ValueError('invalid "transport" (must be a Transport-instance)"') 885 | self.__transport = transport 886 | 887 | def __str__(self): 888 | return repr(self) 889 | def __repr__(self): 890 | return "" % (self.__transport, self.__data_serializer) 891 | 892 | def __req( self, methodname, args=None, kwargs=None, id=0 ): 893 | # JSON-RPC 1.0: only positional parameters 894 | if len(kwargs) > 0 and isinstance(self.data_serializer, JsonRpc10): 895 | raise ValueError("Only positional parameters allowed in JSON-RPC 1.0") 896 | # JSON-RPC 2.0: only args OR kwargs allowed! 897 | if len(args) > 0 and len(kwargs) > 0: 898 | raise ValueError("Only positional or named parameters are allowed!") 899 | if len(kwargs) == 0: 900 | req_str = self.__data_serializer.dumps_request( methodname, args, id ) 901 | else: 902 | req_str = self.__data_serializer.dumps_request( methodname, kwargs, id ) 903 | try: 904 | resp_str = self.__transport.sendrecv( req_str ) 905 | except Exception,err: 906 | raise RPCTransportError(err) 907 | resp = self.__data_serializer.loads_response( resp_str ) 908 | return resp[0] 909 | 910 | def __getattr__(self, name): 911 | # magic method dispatcher 912 | # note: to call a remote object with an non-standard name, use 913 | # result getattr(my_server_proxy, "strange-python-name")(args) 914 | return _method(self.__req, name) 915 | 916 | # request dispatcher 917 | class _method: 918 | """some "magic" to bind an RPC method to an RPC server. 919 | 920 | Supports "nested" methods (e.g. examples.getStateName). 921 | 922 | :Raises: AttributeError for method-names/attributes beginning with '_'. 923 | """ 924 | def __init__(self, req, name): 925 | if name[0] == "_": #prevent rpc-calls for proxy._*-functions 926 | raise AttributeError("invalid attribute '%s'" % name) 927 | self.__req = req 928 | self.__name = name 929 | def __getattr__(self, name): 930 | if name[0] == "_": #prevent rpc-calls for proxy._*-functions 931 | raise AttributeError("invalid attribute '%s'" % name) 932 | return _method(self.__req, "%s.%s" % (self.__name, name)) 933 | def __call__(self, *args, **kwargs): 934 | return self.__req(self.__name, args, kwargs) 935 | 936 | #========================================= 937 | # server side: Server 938 | 939 | class Server: 940 | """RPC-server. 941 | 942 | It works with different data/serializers and 943 | with different transports. 944 | 945 | :Example: 946 | see module-docstring 947 | 948 | :TODO: 949 | - mixed JSON-RPC 1.0/2.0 server? 950 | - logging/loglevels? 951 | """ 952 | def __init__( self, data_serializer, transport, logfile=None ): 953 | """ 954 | :Parameters: 955 | - data_serializer: a data_structure+serializer-instance 956 | - transport: a Transport instance 957 | - logfile: file to log ("unexpected") errors to 958 | """ 959 | #TODO: check parameters 960 | self.__data_serializer = data_serializer 961 | if not isinstance(transport, Transport): 962 | raise ValueError('invalid "transport" (must be a Transport-instance)"') 963 | self.__transport = transport 964 | self.logfile = logfile 965 | if self.logfile is not None: #create logfile (or raise exception) 966 | f = codecs.open( self.logfile, 'a', encoding='utf-8' ) 967 | f.close() 968 | 969 | self.funcs = {} 970 | 971 | def __repr__(self): 972 | return "" % (self.__transport, self.__data_serializer) 973 | 974 | def log(self, message): 975 | """write a message to the logfile (in utf-8)""" 976 | if self.logfile is not None: 977 | f = codecs.open( self.logfile, 'a', encoding='utf-8' ) 978 | f.write( time.strftime("%Y-%m-%d %H:%M:%S ")+message+"\n" ) 979 | f.close() 980 | 981 | def register_instance(self, myinst, name=None): 982 | """Add all functions of a class-instance to the RPC-services. 983 | 984 | All entries of the instance which do not begin with '_' are added. 985 | 986 | :Parameters: 987 | - myinst: class-instance containing the functions 988 | - name: | hierarchical prefix. 989 | | If omitted, the functions are added directly. 990 | | If given, the functions are added as "name.function". 991 | :TODO: 992 | - only add functions and omit attributes? 993 | - improve hierarchy? 994 | """ 995 | for e in dir(myinst): 996 | if e[0][0] != "_": 997 | if name is None: 998 | self.register_function( getattr(myinst, e) ) 999 | else: 1000 | self.register_function( getattr(myinst, e), name="%s.%s" % (name, e) ) 1001 | def register_function(self, function, name=None): 1002 | """Add a function to the RPC-services. 1003 | 1004 | :Parameters: 1005 | - function: function to add 1006 | - name: RPC-name for the function. If omitted/None, the original 1007 | name of the function is used. 1008 | """ 1009 | if name is None: 1010 | self.funcs[function.__name__] = function 1011 | else: 1012 | self.funcs[name] = function 1013 | 1014 | def handle(self, rpcstr): 1015 | """Handle a RPC-Request. 1016 | 1017 | :Parameters: 1018 | - rpcstr: the received rpc-string 1019 | :Returns: the data to send back or None if nothing should be sent back 1020 | :Raises: RPCFault (and maybe others) 1021 | """ 1022 | #TODO: id 1023 | notification = False 1024 | try: 1025 | req = self.__data_serializer.loads_request( rpcstr ) 1026 | if len(req) == 2: #notification 1027 | method, params = req 1028 | notification = True 1029 | else: #request 1030 | method, params, id = req 1031 | except RPCFault, err: 1032 | return self.__data_serializer.dumps_error( err, id=None ) 1033 | except Exception, err: 1034 | self.log( "%d (%s): %s" % (INTERNAL_ERROR, ERROR_MESSAGE[INTERNAL_ERROR], str(err)) ) 1035 | return self.__data_serializer.dumps_error( RPCFault(INTERNAL_ERROR, ERROR_MESSAGE[INTERNAL_ERROR]), id=None ) 1036 | 1037 | if method not in self.funcs: 1038 | if notification: 1039 | return None 1040 | return self.__data_serializer.dumps_error( RPCFault(METHOD_NOT_FOUND, ERROR_MESSAGE[METHOD_NOT_FOUND]), id ) 1041 | 1042 | try: 1043 | if isinstance(params, dict): 1044 | result = self.funcs[method]( **params ) 1045 | else: 1046 | result = self.funcs[method]( *params ) 1047 | except RPCFault, err: 1048 | if notification: 1049 | return None 1050 | return self.__data_serializer.dumps_error( err, id=None ) 1051 | except Exception, err: 1052 | if notification: 1053 | return None 1054 | self.log( "%d (%s): %s" % (INTERNAL_ERROR, ERROR_MESSAGE[INTERNAL_ERROR], str(err)) ) 1055 | return self.__data_serializer.dumps_error( RPCFault(INTERNAL_ERROR, ERROR_MESSAGE[INTERNAL_ERROR]), id ) 1056 | 1057 | if notification: 1058 | return None 1059 | try: 1060 | return self.__data_serializer.dumps_response( result, id ) 1061 | except Exception, err: 1062 | self.log( "%d (%s): %s" % (INTERNAL_ERROR, ERROR_MESSAGE[INTERNAL_ERROR], str(err)) ) 1063 | return self.__data_serializer.dumps_error( RPCFault(INTERNAL_ERROR, ERROR_MESSAGE[INTERNAL_ERROR]), id ) 1064 | 1065 | def serve(self, n=None): 1066 | """serve (forever or for n communicaions). 1067 | 1068 | :See: Transport 1069 | """ 1070 | self.__transport.serve( self.handle, n ) 1071 | 1072 | #========================================= 1073 | 1074 | -------------------------------------------------------------------------------- /progressbar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: iso-8859-1 -*- 3 | # 4 | # progressbar - Text progressbar library for python. 5 | # Copyright (c) 2005 Nilton Volpato 6 | # 7 | # This library is free software; you can redistribute it and/or 8 | # modify it under the terms of the GNU Lesser General Public 9 | # License as published by the Free Software Foundation; either 10 | # version 2.1 of the License, or (at your option) any later version. 11 | # 12 | # This library is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | # Lesser General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU Lesser General Public 18 | # License along with this library; if not, write to the Free Software 19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 | 21 | 22 | """Text progressbar library for python. 23 | 24 | This library provides a text mode progressbar. This is typically used 25 | to display the progress of a long running operation, providing a 26 | visual clue that processing is underway. 27 | 28 | The ProgressBar class manages the progress, and the format of the line 29 | is given by a number of widgets. A widget is an object that may 30 | display diferently depending on the state of the progress. There are 31 | three types of widget: 32 | - a string, which always shows itself; 33 | - a ProgressBarWidget, which may return a diferent value every time 34 | it's update method is called; and 35 | - a ProgressBarWidgetHFill, which is like ProgressBarWidget, except it 36 | expands to fill the remaining width of the line. 37 | 38 | The progressbar module is very easy to use, yet very powerful. And 39 | automatically supports features like auto-resizing when available. 40 | """ 41 | 42 | __author__ = "Nilton Volpato" 43 | __author_email__ = "first-name dot last-name @ gmail.com" 44 | __date__ = "2006-05-07" 45 | __version__ = "2.2" 46 | 47 | # Changelog 48 | # 49 | # 2006-05-07: v2.2 fixed bug in windows 50 | # 2005-12-04: v2.1 autodetect terminal width, added start method 51 | # 2005-12-04: v2.0 everything is now a widget (wow!) 52 | # 2005-12-03: v1.0 rewrite using widgets 53 | # 2005-06-02: v0.5 rewrite 54 | # 2004-??-??: v0.1 first version 55 | 56 | import sys 57 | import time 58 | from array import array 59 | try: 60 | from fcntl import ioctl 61 | import termios 62 | except ImportError: 63 | pass 64 | import signal 65 | 66 | 67 | class ProgressBarWidget(object): 68 | """This is an element of ProgressBar formatting. 69 | 70 | The ProgressBar object will call it's update value when an update 71 | is needed. It's size may change between call, but the results will 72 | not be good if the size changes drastically and repeatedly. 73 | """ 74 | def update(self, pbar): 75 | """Returns the string representing the widget. 76 | 77 | The parameter pbar is a reference to the calling ProgressBar, 78 | where one can access attributes of the class for knowing how 79 | the update must be made. 80 | 81 | At least this function must be overriden.""" 82 | pass 83 | 84 | 85 | class ProgressBarWidgetHFill(object): 86 | """This is a variable width element of ProgressBar formatting. 87 | 88 | The ProgressBar object will call it's update value, informing the 89 | width this object must the made. This is like TeX \\hfill, it will 90 | expand to fill the line. You can use more than one in the same 91 | line, and they will all have the same width, and together will 92 | fill the line. 93 | """ 94 | def update(self, pbar, width): 95 | """Returns the string representing the widget. 96 | 97 | The parameter pbar is a reference to the calling ProgressBar, 98 | where one can access attributes of the class for knowing how 99 | the update must be made. The parameter width is the total 100 | horizontal width the widget must have. 101 | 102 | At least this function must be overriden.""" 103 | pass 104 | 105 | 106 | class ETA(ProgressBarWidget): 107 | "Widget for the Estimated Time of Arrival" 108 | def format_time(self, seconds): 109 | return time.strftime('%H:%M:%S', time.gmtime(seconds)) 110 | 111 | def update(self, pbar): 112 | if pbar.currval == 0: 113 | return 'ETA: --:--:--' 114 | elif pbar.finished: 115 | return 'Time: %s' % self.format_time(pbar.seconds_elapsed) 116 | else: 117 | elapsed = pbar.seconds_elapsed 118 | eta = elapsed * pbar.maxval / pbar.currval - elapsed 119 | return 'ETA: %s' % self.format_time(eta) 120 | 121 | 122 | class FileTransferSpeed(ProgressBarWidget): 123 | "Widget for showing the transfer speed (useful for file transfers)." 124 | def __init__(self): 125 | self.fmt = '%6.2f %s' 126 | self.units = ['B', 'K', 'M', 'G', 'T', 'P'] 127 | 128 | def update(self, pbar): 129 | if pbar.seconds_elapsed < 2e-6: # == 0: 130 | bps = 0.0 131 | else: 132 | bps = float(pbar.currval) / pbar.seconds_elapsed 133 | spd = bps 134 | for u in self.units: 135 | if spd < 1000: 136 | break 137 | spd /= 1000 138 | return self.fmt % (spd, u + '/s') 139 | 140 | 141 | class RotatingMarker(ProgressBarWidget): 142 | "A rotating marker for filling the bar of progress." 143 | def __init__(self, markers='|/-\\'): 144 | self.markers = markers 145 | self.curmark = -1 146 | 147 | def update(self, pbar): 148 | if pbar.finished: 149 | return self.markers[0] 150 | self.curmark = (self.curmark + 1) % len(self.markers) 151 | return self.markers[self.curmark] 152 | 153 | 154 | class Percentage(ProgressBarWidget): 155 | "Just the percentage done." 156 | def update(self, pbar): 157 | return '%3d%%' % pbar.percentage() 158 | 159 | 160 | class Fraction(ProgressBarWidget): 161 | "Just the fraction done." 162 | def update(self, pbar): 163 | return "%d/%d" % (pbar.currval, pbar.maxval) 164 | 165 | 166 | class Bar(ProgressBarWidgetHFill): 167 | "The bar of progress. It will strech to fill the line." 168 | def __init__(self, marker='#', left='|', right='|'): 169 | self.marker = marker 170 | self.left = left 171 | self.right = right 172 | 173 | def _format_marker(self, pbar): 174 | if isinstance(self.marker, (str, unicode)): 175 | return self.marker 176 | else: 177 | return self.marker.update(pbar) 178 | 179 | def update(self, pbar, width): 180 | percent = pbar.percentage() 181 | cwidth = width - len(self.left) - len(self.right) 182 | marked_width = int(percent * cwidth / 100) 183 | m = self._format_marker(pbar) 184 | bar = (self.left + (m * marked_width).ljust(cwidth) + self.right) 185 | return bar 186 | 187 | 188 | class ReverseBar(Bar): 189 | "The reverse bar of progress, or bar of regress. :)" 190 | def update(self, pbar, width): 191 | percent = pbar.percentage() 192 | cwidth = width - len(self.left) - len(self.right) 193 | marked_width = int(percent * cwidth / 100) 194 | m = self._format_marker(pbar) 195 | bar = (self.left + (m * marked_width).rjust(cwidth) + self.right) 196 | return bar 197 | 198 | default_widgets = [Percentage(), ' ', Bar()] 199 | 200 | 201 | class ProgressBar(object): 202 | """This is the ProgressBar class, it updates and prints the bar. 203 | 204 | The term_width parameter may be an integer. Or None, in which case 205 | it will try to guess it, if it fails it will default to 80 columns. 206 | 207 | The simple use is like this: 208 | >>> pbar = ProgressBar().start() 209 | >>> for i in xrange(100): 210 | ... # do something 211 | ... pbar.update(i+1) 212 | ... 213 | >>> pbar.finish() 214 | 215 | But anything you want to do is possible (well, almost anything). 216 | You can supply different widgets of any type in any order. And you 217 | can even write your own widgets! There are many widgets already 218 | shipped and you should experiment with them. 219 | 220 | When implementing a widget update method you may access any 221 | attribute or function of the ProgressBar object calling the 222 | widget's update method. The most important attributes you would 223 | like to access are: 224 | - currval: current value of the progress, 0 <= currval <= maxval 225 | - maxval: maximum (and final) value of the progress 226 | - finished: True if the bar is have finished (reached 100%), False o/w 227 | - start_time: first time update() method of ProgressBar was called 228 | - seconds_elapsed: seconds elapsed since start_time 229 | - percentage(): percentage of the progress (this is a method) 230 | """ 231 | def __init__(self, maxval=100, widgets=default_widgets, term_width=None, 232 | fd=sys.stderr, force_update=False): 233 | assert maxval > 0 234 | self.maxval = maxval 235 | self.widgets = widgets 236 | self.fd = fd 237 | self.signal_set = False 238 | if term_width is None: 239 | try: 240 | self.handle_resize(None, None) 241 | signal.signal(signal.SIGWINCH, self.handle_resize) 242 | self.signal_set = True 243 | except: 244 | self.term_width = 79 245 | else: 246 | self.term_width = term_width 247 | 248 | self.currval = 0 249 | self.finished = False 250 | self.prev_percentage = -1 251 | self.start_time = None 252 | self.seconds_elapsed = 0 253 | self.force_update = force_update 254 | 255 | def handle_resize(self, signum, frame): 256 | h, w = array('h', ioctl(self.fd, termios.TIOCGWINSZ, '\0' * 8))[:2] 257 | self.term_width = w 258 | 259 | def percentage(self): 260 | "Returns the percentage of the progress." 261 | return self.currval * 100.0 / self.maxval 262 | 263 | def _format_widgets(self): 264 | r = [] 265 | hfill_inds = [] 266 | num_hfill = 0 267 | currwidth = 0 268 | for i, w in enumerate(self.widgets): 269 | if isinstance(w, ProgressBarWidgetHFill): 270 | r.append(w) 271 | hfill_inds.append(i) 272 | num_hfill += 1 273 | elif isinstance(w, (str, unicode)): 274 | r.append(w) 275 | currwidth += len(w) 276 | else: 277 | weval = w.update(self) 278 | currwidth += len(weval) 279 | r.append(weval) 280 | for iw in hfill_inds: 281 | r[iw] = r[iw].update(self, 282 | (self.term_width - currwidth) / num_hfill) 283 | return r 284 | 285 | def _format_line(self): 286 | return ''.join(self._format_widgets()).ljust(self.term_width) 287 | 288 | def _need_update(self): 289 | if self.force_update: 290 | return True 291 | return int(self.percentage()) != int(self.prev_percentage) 292 | 293 | def reset(self): 294 | if not self.finished and self.start_time: 295 | self.finish() 296 | self.finished = False 297 | self.currval = 0 298 | self.start_time = None 299 | self.seconds_elapsed = None 300 | self.prev_percentage = None 301 | return self 302 | 303 | def update(self, value): 304 | "Updates the progress bar to a new value." 305 | assert 0 <= value <= self.maxval 306 | self.currval = value 307 | if not self._need_update() or self.finished: 308 | return 309 | if not self.start_time: 310 | self.start_time = time.time() 311 | self.seconds_elapsed = time.time() - self.start_time 312 | self.prev_percentage = self.percentage() 313 | if value != self.maxval: 314 | self.fd.write(self._format_line() + '\r') 315 | else: 316 | self.finished = True 317 | self.fd.write(self._format_line() + '\n') 318 | 319 | def start(self): 320 | """Start measuring time, and prints the bar at 0%. 321 | 322 | It returns self so you can use it like this: 323 | >>> pbar = ProgressBar().start() 324 | >>> for i in xrange(100): 325 | ... # do something 326 | ... pbar.update(i+1) 327 | ... 328 | >>> pbar.finish() 329 | """ 330 | self.update(0) 331 | return self 332 | 333 | def finish(self): 334 | """Used to tell the progress is finished.""" 335 | self.update(self.maxval) 336 | if self.signal_set: 337 | signal.signal(signal.SIGWINCH, signal.SIG_DFL) 338 | 339 | 340 | def example1(): 341 | widgets = ['Test: ', Percentage(), ' ', Bar(marker=RotatingMarker()), 342 | ' ', ETA(), ' ', FileTransferSpeed()] 343 | pbar = ProgressBar(widgets=widgets, maxval=10000000).start() 344 | for i in range(1000000): 345 | # do something 346 | pbar.update(10 * i + 1) 347 | pbar.finish() 348 | return pbar 349 | 350 | 351 | def example2(): 352 | class CrazyFileTransferSpeed(FileTransferSpeed): 353 | "It's bigger between 45 and 80 percent" 354 | def update(self, pbar): 355 | if 45 < pbar.percentage() < 80: 356 | return 'Bigger Now ' + FileTransferSpeed.update(self, pbar) 357 | else: 358 | return FileTransferSpeed.update(self, pbar) 359 | 360 | widgets = [CrazyFileTransferSpeed(), ' <<<', 361 | Bar(), '>>> ', Percentage(), ' ', ETA()] 362 | pbar = ProgressBar(widgets=widgets, maxval=10000000) 363 | # maybe do something 364 | pbar.start() 365 | for i in range(2000000): 366 | # do something 367 | pbar.update(5 * i + 1) 368 | pbar.finish() 369 | return pbar 370 | 371 | 372 | def example3(): 373 | widgets = [Bar('>'), ' ', ETA(), ' ', ReverseBar('<')] 374 | pbar = ProgressBar(widgets=widgets, maxval=10000000).start() 375 | for i in range(1000000): 376 | # do something 377 | pbar.update(10 * i + 1) 378 | pbar.finish() 379 | return pbar 380 | 381 | 382 | def example4(): 383 | widgets = ['Test: ', Percentage(), ' ', 384 | Bar(marker='0', left='[', right=']'), 385 | ' ', ETA(), ' ', FileTransferSpeed()] 386 | pbar = ProgressBar(widgets=widgets, maxval=500) 387 | pbar.start() 388 | for i in range(100, 500 + 1, 50): 389 | time.sleep(0.2) 390 | pbar.update(i) 391 | pbar.finish() 392 | return pbar 393 | 394 | 395 | def example5(): 396 | widgets = ['Test: ', Fraction(), ' ', Bar(marker=RotatingMarker()), 397 | ' ', ETA(), ' ', FileTransferSpeed()] 398 | pbar = ProgressBar(widgets=widgets, maxval=10, force_update=True).start() 399 | for i in range(1, 11): 400 | # do something 401 | time.sleep(0.5) 402 | pbar.update(i) 403 | pbar.finish() 404 | return pbar 405 | 406 | 407 | def main(): 408 | example1() 409 | print 410 | example2() 411 | print 412 | example3() 413 | print 414 | example4() 415 | print 416 | example5() 417 | print 418 | 419 | if __name__ == '__main__': 420 | main() 421 | --------------------------------------------------------------------------------