├── COPYING ├── INSTALL.md ├── README.md ├── andoc.py ├── concept-diagram.png ├── data ├── 1.txt ├── 2.txt ├── 3.txt ├── 4.txt ├── 5.txt ├── 6.txt ├── 7.txt ├── doc-struc.png ├── event-timeline.jpg └── event-tree.json ├── doc.py ├── featext.py ├── graph2redis.py ├── import.py ├── import_maildir.py ├── rediskeys.py ├── selection.py ├── static ├── andoc.css ├── andoc.js ├── back.png ├── d3.layout.min.js ├── d3.min.js ├── jquery-1.5.min.js ├── json2.js └── tg.html ├── templates ├── base.html ├── date.html ├── date │ └── list.html ├── default.html ├── doc.html ├── doc │ ├── list.html │ ├── raw.html │ ├── struc.html │ └── view.html ├── event.html ├── event │ └── list.html ├── person.html ├── person │ ├── graph.html │ └── list.html ├── place.html ├── place │ └── list.html └── search │ └── result.html └── triple.py /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Install and basic usage 2 | 3 | $ easy_install lxml simplejson cherrypy jinja2 redis 4 | 5 | # Import 6 | 7 | Please make sure a redis-server is running. 8 | 9 | import example plain-text files from the data/ folder. 10 | 11 | $ python import.py data '.*txt$' 12 | 13 | # Start andoc 14 | 15 | $ python andoc.py 16 | $ open 'http://localhost:8080' 17 | 18 | Please use a **recent** version of Chrome or Firefox. 19 | 20 | # Graph generation 21 | 22 | install igraph and the python igraph extension from: 23 | http://igraph.sourceforge.net/download.html 24 | 25 | extract the graph related features from redis and create data/person.gml 26 | 27 | $ python featext.py 28 | 29 | render the graph layout and save the x,y positions into redis 30 | 31 | $ python graph2redis.py 32 | $ open 'http://localhost:8080/person/graph' 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # andoc # 2 | 3 | A collaborative web tool to enrich content. 4 | 5 | ## Install and usage ## 6 | 7 | See INSTALL.md 8 | 9 | ## Idea ## 10 | 11 | The idea of andoc is the enrichment and analysis of a wide range of content. 12 | 13 | Like wikipdia or etherpad/gobby, it is a collaborative tool where many users 14 | can work on the same content at the same time. However, andoc is not about 15 | creating content but aims to enrich existing data with a specific set of metadata. 16 | 17 | In the second step, andoc is analyzing the collected metadata and provides 18 | the user with dynamic visualisations to access and navigate the content. 19 | 20 | This is especially helpful with larger sets of data. 21 | 22 | ## The Model ## 23 | 24 | The main metadata in andoc is the concept of an "event". An event usually 25 | consists of a place and a time and agents (or persons) present at the 26 | event. 27 | 28 | Therefore one aspect of andoc is to identify these elements in the existing 29 | data. 30 | 31 | ## Example ## 32 | 33 | Let's take a snippet from a mail conversation: 34 | 35 | We left Warren at Dean Gate, in our way home last night, and he is 36 | now on his road to town. He left his love, &c., to you, and I will 37 | deliver it when we meet. Henry goes to Harden to-day in his way to 38 | his Master's degree. We shall feel the loss of these two most 39 | agreeable young men exceedingly, and shall have nothing to console 40 | us till the arrival of the Coopers on Tuesday. As they will stay 41 | here till the Monday following, perhaps Caroline will go to the 42 | Ashe ball with me, though I dare say she will not. 43 | 44 | and enrich the content: 45 | 46 | (p) is a person, (d) a date, (l) a location and (e) an event. 47 | 48 | We left at , in our way home , and he 49 | is now on his road to . He left his love, &c., to you, and I will 50 | deliver it when we meet. goes to in his way 51 | to his Master's degree. We shall feel the loss of these two most agreeable 52 | young men exceedingly, and shall have nothing to console us till the arrival of 53 | the on . As they will stay here till the 54 | following, perhaps will go to the with me, though 55 | I dare say she will not. 56 | 57 | Here is the example in andoc: 58 | 59 | ![doc struc](https://github.com/endpnt/andoc/raw/master/data/doc-struc.png "doc struc") 60 | 61 | Andoc would then know about the existence of: 62 | 63 | Agents: 64 | 65 | * Warren 66 | * Henry 67 | * Coopers 68 | * Caroline 69 | 70 | Places: 71 | 72 | * Dean Gate 73 | * Town 74 | * Harden 75 | 76 | Date: 77 | 78 | * last night 79 | * to-day 80 | * Monday 81 | * Tuesday 82 | 83 | Event: 84 | 85 | * Ashe ball 86 | 87 | In the actual interface the user should be provided with additional tools, 88 | so that "Monday" or "to-day" in the context of this document would actually 89 | represent a real date. 90 | 91 | Since some of the steps can be done with the help of natural language 92 | processing, andoc aims to provide automatic processing of the data as well. 93 | 94 | ## Analysis ## 95 | 96 | The afford to enrich the documents, should lead to a direct improvement 97 | for the users: 98 | 99 | It will for example enable the user to see the events with associated relations 100 | on a timeline: 101 | ![event timeline](https://github.com/endpnt/andoc/raw/master/data/event-timeline.jpg "enriched event timeline preview") 102 | 103 | * Provide additional information about person, places and events 104 | from sources like wikipedia along the data. Context matters. 105 | 106 | * Visualisation of semantic relations, social networks, related data 107 | and events. 108 | 109 | * Grouping of related data based on event, place or person. 110 | 111 | * Timeline of events and data. 112 | 113 | * Geographical presentation (map) of events and data. 114 | 115 | All those presentations should be updated constantly as the enrichment process 116 | progresses. 117 | 118 | ## Questions? Ideas? ## 119 | 120 | Contact me on twitter @endpnt 121 | 122 | 123 | ## Copyleft ## 124 | 125 | GPLv3 see COPYING 126 | -------------------------------------------------------------------------------- /andoc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | # 16 | 17 | import cherrypy, simplejson, re, string 18 | import redis 19 | from os import path 20 | from sys import exit 21 | from lxml import html as lxhtml 22 | from lxml.html import builder as b 23 | from urlparse import urlsplit 24 | from itertools import izip_longest 25 | 26 | from doc import * 27 | from selection import * 28 | from triple import * 29 | from jinja2 import Template, Environment, FileSystemLoader 30 | # key pattern for redis 31 | from rediskeys import * 32 | 33 | CURDIR = path.dirname(path.abspath(__file__)) 34 | STATICDIR = CURDIR + "/static/" 35 | TEMPLATES_DIR = CURDIR + "/templates/" 36 | 37 | 38 | # cherrypy json helper 39 | def jsonify_tool_callback(*args, **kwargs): 40 | response = cherrypy.response 41 | response.headers['Content-Type'] = 'application/json' 42 | response.body = simplejson.JSONEncoder().iterencode(response.body) 43 | 44 | cherrypy.tools.jsonify = cherrypy.Tool('before_finalize', 45 | jsonify_tool_callback, priority=30) 46 | 47 | class Andoc(object): 48 | 49 | def __init__(self, redis): 50 | self._env = Environment(loader=FileSystemLoader(TEMPLATES_DIR)) 51 | self._redis = redis 52 | self._txt_selections = TextSelections(self._redis) 53 | self._html_selections = HtmlSelections(self._redis) 54 | self._triples = Triples(self._redis) 55 | self._docs = Documents(self._redis) 56 | 57 | def default(self): 58 | default = self._env.get_template('default.html') 59 | return default.render(title='Andoc Default') 60 | 61 | default.exposed = True 62 | 63 | def search(self,query): 64 | search_tmpl = self._env.get_template('search/result.html') 65 | return search_tmpl.render(title='Andoc Search', result='TODO') 66 | 67 | search.exposed = True 68 | 69 | def event(self, action, id=None): 70 | if action == 'list': 71 | event_list_tmpl = self._env.get_template('event/list.html') 72 | return event_list_tmpl.render(title='Events') 73 | 74 | return "" 75 | 76 | event.exposed = True 77 | 78 | def person(self, action): 79 | if action == 'list': 80 | person_list_tmpl = self._env.get_template('person/list.html') 81 | persons = [] 82 | for t in self._triples.from_predicate('person'): 83 | persons.append({'uri':t.subject, 'name':t.object}) 84 | 85 | return person_list_tmpl.render( 86 | title = 'Persons', 87 | persons = sorted(persons)) 88 | 89 | elif action == 'graph': 90 | person_graph_tmpl = self._env.get_template('person/graph.html') 91 | gpipe = self._redis.pipeline() 92 | gpipe.sort( 93 | LAYOUT_EDGES, 94 | by='nosort', 95 | get=[ LAYOUT_EDGE_POS_X1 % '*', 96 | LAYOUT_EDGE_POS_X2 % '*', 97 | LAYOUT_EDGE_POS_Y1 % '*', 98 | LAYOUT_EDGE_POS_Y2 % '*']) 99 | gpipe.sort( 100 | LAYOUT_VERTICES, 101 | by='nosort', 102 | get=[ LAYOUT_VERTICE_POS_X % '*', 103 | LAYOUT_VERTICE_POS_Y % '*', 104 | LAYOUT_VERTICE_OBJ_ID % '*', 105 | LAYOUT_VERTICE_LABEL % '*']) 106 | tmp_edges, tmp_vertices = gpipe.execute() 107 | 108 | # data comes as one long list from redis, 109 | # group both by 4 and create a dict 110 | edges_by4 = [ a for a in [iter(tmp_edges)] * 4 ] 111 | edges = [ dict({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2 }) \ 112 | for x1,x2,y1,y2 in izip_longest(*edges_by4) ] 113 | 114 | vertices_by4 = [ a for a in [iter(tmp_vertices)] * 4 ] 115 | vertices = [ dict({'x': x, 'y': y, 116 | 'obj_id': int(float(obj_id)), 117 | 'label': label }) \ 118 | for x,y,obj_id,label in izip_longest(*vertices_by4) ] 119 | 120 | width = self._redis.get(LAYOUT_WIDTH) 121 | height = self._redis.get(LAYOUT_HEIGHT) 122 | if width and height: 123 | w = '%d' % (int(width) + 100) 124 | h = '%d' % (int(height) + 50) 125 | 126 | xw = '%.4f' % ((int(width) + 100) / 2.0) 127 | xh = '%.4f' % ((int(height) + 50) / 2.0) 128 | else: 129 | w,h,xw,xh = 0,0,0.0,0.0 130 | 131 | return person_graph_tmpl.render( 132 | edges = edges, 133 | vertices = vertices, 134 | width = w, 135 | height = h, 136 | xwidth = xw, 137 | xheight = xh, 138 | title = 'Persons') 139 | else: 140 | return "" 141 | person.exposed = True 142 | 143 | def place(self, action): 144 | if action == 'list': 145 | place_list_tmpl = self._env.get_template('place/list.html') 146 | html = [] 147 | places = [] 148 | for t in self._triples.from_predicate('place'): 149 | places.append({'uri': t.subject, 'name': t.object}) 150 | 151 | return place_list_tmpl.render( 152 | title = 'Places', 153 | places = sorted(places)) 154 | else: 155 | return "" 156 | place.exposed = True 157 | 158 | def date(self, action): 159 | if action == 'list': 160 | date_list_tmpl = self._env.get_template('date/list.html') 161 | html = [] 162 | dates = [] 163 | for t in self._triples.from_predicate('date'): 164 | dates.append({'uri': t.subject, 'name': t.object}) 165 | 166 | return date_list_tmpl.render( 167 | title = 'Dates', 168 | dates = sorted(dates)) 169 | else: 170 | return "" 171 | date.exposed = True 172 | 173 | 174 | def doc(self, action='', id=None): 175 | if action == 'list' and id is None: 176 | list_tmpl = self._env.get_template('doc/list.html') 177 | return list_tmpl.render( 178 | title='Document List', 179 | documents = self._docs.get_list()) 180 | 181 | d = Document(self._redis,id=id) 182 | if not d.content: 183 | raise cherrypy.HTTPError(404, "No such document") 184 | 185 | if action == 'raw': 186 | raw_tmpl = self._env.get_template('doc/raw.html') 187 | return raw_tmpl.render(title='Raw Document', doc = d) 188 | 189 | elif action == 'struc': 190 | struc_tmpl = self._env.get_template('doc/struc.html') 191 | elements = self._render(d.id) 192 | # TODO migrate this to jinja or keep lxml? 193 | if elements: 194 | html = [] 195 | for start, end, sel in elements: 196 | if len(d.content[start:end].strip()) > 0: 197 | cclass = b.CLASS('s' + str(start) + 'e' + str(end)) 198 | if sel is not None and sel.docid == d.id: 199 | node = b.E(NAMESPACE[sel.ref], cclass) 200 | node.text = d.content[start:end] 201 | html.append(node) 202 | else: 203 | html.append(b.PRE(d.content[start:end], cclass)) 204 | 205 | return struc_tmpl.render( 206 | title = "Document Semantic", 207 | doc = d, 208 | struc = lxhtml.tostring(b.DIV(*html))) 209 | else: 210 | return struc_tmpl.render( 211 | title = "Document Semantic", 212 | doc = d, 213 | struc = 'Nothing here jet') 214 | 215 | elif action == 'view': 216 | view_tmpl = self._env.get_template('doc/view.html') 217 | elements = self._render(d.id) 218 | if elements: 219 | content_html = [] 220 | meta_html = [] 221 | metalist = {} 222 | for start, end, sel in elements: 223 | if len(d.content[start:end]) > 0: 224 | if sel is not None and sel.docid == d.id: 225 | node = b.E(NAMESPACE[sel.ref]) 226 | node.text = d.content[start:end].strip() 227 | content_html.append(node) 228 | else: 229 | content_html.append(b.PRE(d.content[start:end].strip())) 230 | 231 | for pre in ('person','place','date','event'): 232 | metalist[pre] = set() 233 | 234 | #for sel, sub, pre, obj, start, end in self._triples: 235 | # if sel is not None and sel.docid == d.id: 236 | # metalist[pre].add(obj) 237 | 238 | for pre in ('person','place','date','event'): 239 | metali = [] 240 | for m in sorted(metalist[pre]): 241 | metali.append(b.LI(m)) 242 | meta_html.append(b.H3(pre)) 243 | meta_html.append(b.UL(*metali)) 244 | 245 | content = lxhtml.tostring(b.DIV(*content_html)) 246 | meta = lxhtml.tostring(b.DIV(*meta_html)) 247 | 248 | return view_tmpl.render( 249 | title = 'Document View', 250 | doc = d, 251 | content = content, 252 | meta = meta) 253 | else: 254 | return view_tmpl.render( 255 | title = 'Document View', 256 | doc = d, 257 | content = 'Nothing to render', 258 | meta = '') 259 | else: 260 | raise cherrypy.HTTPError(400, 'No such action') 261 | 262 | doc.exposed = True 263 | 264 | def get_json(self): 265 | cl = cherrypy.request.headers['Content-Length'] 266 | rawbody = cherrypy.request.body.read(int(cl)) 267 | return simplejson.loads(rawbody) 268 | 269 | def _render(self, id): 270 | d = Document(self._redis, id = id) 271 | if not d.content: 272 | return False 273 | 274 | selections = [] 275 | for sel in self._txt_selections.from_document_id(d.id): 276 | t = (sel.start, sel.end, sel) 277 | selections.append(t) 278 | 279 | if len(selections) == 0: 280 | return False 281 | 282 | selections.sort() 283 | 284 | # check for overlapping selections 285 | # we create sets with all string positions 286 | # and check for any intersections between them 287 | # if they overlap, remove 288 | removed = [] 289 | last = set() 290 | for index, (start, end, sel) in enumerate(selections): 291 | current = set(range(start,end)) 292 | intersec = last.intersection(current) 293 | if intersec != current and len(intersec) > 0: 294 | removed.append(selections.pop(index)) 295 | last = current 296 | 297 | matched = set() 298 | max = len(d.content) 299 | # every possible char position 300 | everything = set(range(0,max)) 301 | 302 | # every char position of all selected ranges 303 | for start, end, sel in selections: 304 | matched = matched.union(set(range(start,end))) 305 | 306 | # positions of all unmatched chars 307 | unmatched = list(everything.difference(matched)) 308 | 309 | if len(unmatched) > 0: 310 | untouched = [] # list of untouched ranges 311 | unmatch_last_pos = 0 312 | unmatch_start = unmatched[0] # set position of the first unmatched char 313 | unmatch_last = unmatched[-1] # set position of the last unmatched char 314 | 315 | # this is detecting a gap in a range 316 | # only add if we have a real range, not just a single position 317 | for unmatch_pos in unmatched: 318 | if unmatch_last_pos > 0 and unmatch_pos != unmatch_last_pos + 1: 319 | if unmatch_start < unmatch_last_pos: 320 | untouched.append((unmatch_start,unmatch_last_pos,None)) 321 | unmatch_start = unmatch_pos 322 | unmatch_last_pos = unmatch_pos 323 | 324 | # last unmatched range 325 | if unmatch_start < unmatch_last: 326 | untouched.append((unmatch_start,unmatch_last,None)) 327 | 328 | print "Selections" 329 | print selections 330 | print "Removed" 331 | print removed 332 | print "untouched" 333 | print untouched 334 | 335 | elements = selections + untouched 336 | elements.sort() 337 | return elements 338 | 339 | 340 | class Rest(object): 341 | def __init__(self, redis): 342 | self._redis = redis 343 | self._txt_selections = TextSelections(self._redis) 344 | self._html_selections = HtmlSelections(self._redis) 345 | self._triples = Triples(self._redis) 346 | 347 | def get_json(self): 348 | cl = cherrypy.request.headers['Content-Length'] 349 | rawbody = cherrypy.request.body.read(int(cl)) 350 | return simplejson.loads(rawbody) 351 | 352 | @cherrypy.tools.jsonify() 353 | def selection(self,action,id): 354 | d = Document(self._redis, id = id) 355 | if not d.content: 356 | return "No such document" 357 | 358 | if action == 'list': 359 | selections = [] 360 | for s in self._txt_selections.from_document_id(d.id): 361 | t = (s.start, s.end, s.ref) 362 | selections.append(t) 363 | print sorted(selections, reverse=True) 364 | return sorted(selections, reverse=True) 365 | 366 | elif action == 'add': 367 | j = self.get_json() 368 | start = j.get('start',0) 369 | end = j.get('end',0) 370 | ref = j.get('ref','') 371 | 372 | if start == 0 and end == 0: 373 | return "Nothing selected" 374 | 375 | if len(d.content[start:end+1].strip()) == 0: 376 | return "Empty string selected" 377 | 378 | txtsel = TextSelection(d.id, start, end+1, ref) 379 | txtsel.save(self._redis) 380 | del txtsel 381 | 382 | return d.content[start:end+1] 383 | 384 | elif action == 'delete': 385 | j = self.get_json() 386 | start = j.get('start',0) 387 | end = j.get('end',0) 388 | ref = j.get('ref','') 389 | 390 | if start == 0 and end == 0: 391 | return "Nothing selected" 392 | 393 | return "OK" 394 | elif action == 'update': 395 | return "OK" 396 | else: 397 | return "Nothing to do" 398 | 399 | selection.exposed = True 400 | 401 | 402 | @cherrypy.tools.jsonify() 403 | def triple(self,action,id): 404 | d = Document(self._redis, id) 405 | if not d.content: 406 | return "No such document" 407 | 408 | if action == 'add': 409 | j = self.get_json() 410 | sub = j.get('s','') 411 | pre = j.get('p','') 412 | obj = j.get('o','') 413 | start = j.get('start',0) 414 | end = j.get('end',0) 415 | 416 | scheme, host, path, query, param = urlsplit(sub) 417 | 418 | print sub, pre, obj, start, end 419 | tsel = TextSelection(docid = d.id) 420 | tsel.from_url(sub) 421 | 422 | sub = '%s/t%se%s' % (sub, start, end) 423 | trip = Triple(sub, pre, obj) 424 | tid = trip.save(self._redis) 425 | 426 | h = HtmlSelection(d.id, sub, start, end, tid) 427 | h.save(self._redis) 428 | 429 | # save the object relation to this document 430 | d.add_relation(pre, obj) 431 | 432 | s = tsel.start + start 433 | e = tsel.start + end 434 | return d.content[s:e] 435 | 436 | 437 | if action == 'list': 438 | tl = set() 439 | for h in self._html_selections.from_document_id(d.id): 440 | for t in self._triples.from_subject(h.node): 441 | tl.add((t.subject, h.start, h.end, t.pre, t.object)) 442 | 443 | # JS fails to apply the selection with DOM errors, 444 | # if they come in the wrong order. 445 | # Sort by subject (the DOM node as url) 446 | # and the start of the selection inside the node 447 | def subject_sort(t): 448 | scheme, netloc, path, query, fragment = urlsplit(t[0]) 449 | return (fragment.split('/')[0], t[1]) 450 | 451 | # Reversed since we need to build the page bottom to top 452 | # in JS. This avoids problems with the offset inside 453 | # a DOM node. 454 | print sorted(tl, reverse=True, key=subject_sort) 455 | return sorted(tl, reverse=True, key=subject_sort) 456 | else: 457 | return "Nothing to do" 458 | 459 | triple.exposed = True 460 | 461 | 462 | def main(): 463 | config = { 464 | '/static': { 465 | 'tools.staticdir.on': True, 466 | 'tools.staticdir.dir': STATICDIR 467 | }, 468 | '/data': { 469 | 'tools.staticdir.on': True, 470 | 'tools.staticdir.dir': CURDIR + '/data' 471 | }, 472 | '/': { 473 | 'tools.sessions.on': True 474 | } 475 | } 476 | _redis = redis.Redis() 477 | try: 478 | _redis.ping() 479 | except redis.exceptions.ConnectionError: 480 | print "please start redis" 481 | exit(1) 482 | 483 | cherrypy.tree.mount(Andoc(_redis), '/', config) 484 | cherrypy.tree.mount(Rest(_redis), '/rest/', config) 485 | 486 | if hasattr(cherrypy.engine, 'block'): 487 | # 3.1 syntax 488 | cherrypy.engine.start() 489 | cherrypy.engine.block() 490 | else: 491 | # 3.0 syntax 492 | cherrypy.server.quickstart() 493 | cherrypy.engine.start() 494 | 495 | if __name__ == "__main__": 496 | main() 497 | -------------------------------------------------------------------------------- /concept-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/endpnt/andoc/e4cba7995865ca7a49e6a573518f1de7d88dbe3f/concept-diagram.png -------------------------------------------------------------------------------- /data/1.txt: -------------------------------------------------------------------------------- 1 | 2 | I 3 | 4 | Steventon: Saturday (January 9). 5 | 6 | In the first place I hope you will live twenty-three years longer. 7 | Mr. Tom Lefroy's birthday was yesterday, so that you are very near 8 | of an age. 9 | 10 | After this necessary preamble I shall proceed to inform you that we 11 | had an exceeding good ball last night, and that I was very much 12 | disappointed at not seeing Charles Fowle of the party, as I had 13 | previously heard of his being invited. In addition to our set at 14 | the Harwoods' ball, we had the Grants, St. Johns, Lady Rivers, her 15 | three daughters and a son, Mr. and Miss Heathcote, Mrs. Lefevre, 16 | two Mr. Watkins, Mr. J. Portal, Miss Deanes, two Miss Ledgers, and 17 | a tall clergyman who came with them, whose name Mary would never 18 | have guessed. 19 | 20 | We were so terrible good as to take James in our carriage, though 21 | there were three of us before, but indeed he deserves encouragement 22 | for the very great improvement which has lately taken place in his 23 | dancing. Miss Heathcote is pretty, but not near so handsome as I 24 | expected. Mr. H. began with Elizabeth, and afterwards danced with 25 | her again; but _they_ do not know how _to be particular_. I flatter 26 | myself, however, that they will profit by the three successive 27 | lessons which I have given them. 28 | 29 | You scold me so much in the nice long letter which I have this 30 | moment received from you, that I am almost afraid to tell you how 31 | my Irish friend and I behaved. Imagine to yourself everything most 32 | profligate and shocking in the way of dancing and sitting down 33 | together. I _can_ expose myself however, only _once more_, because 34 | he leaves the country soon after next Friday, on which day we _are_ 35 | to have a dance at Ashe after all. He is a very gentlemanlike, 36 | good-looking, pleasant young man, I assure you. But as to our 37 | having ever met, except at the three last balls, I cannot say much; 38 | for he is so excessively laughed at about me at Ashe, that he is 39 | ashamed of coming to Steventon, and ran away when we called on 40 | Mrs. Lefroy a few days ago. 41 | 42 | We left Warren at Dean Gate, in our way home last night, and he is 43 | now on his road to town. He left his love, &c., to you, and I will 44 | deliver it when we meet. Henry goes to Harden to-day in his way to 45 | his Master's degree. We shall feel the loss of these two most 46 | agreeable young men exceedingly, and shall have nothing to console 47 | us till the arrival of the Coopers on Tuesday. As they will stay 48 | here till the Monday following, perhaps Caroline will go to the 49 | Ashe ball with me, though I dare say she will not. 50 | 51 | I danced twice with Warren last night, and once with Mr. Charles 52 | Watkins, and, to my inexpressible astonishment, I entirely escaped 53 | John Lyford. I was forced to fight hard for it, however. We had a 54 | very good supper, and the greenhouse was illuminated in a very 55 | elegant manner. 56 | 57 | We had a visit yesterday morning from Mr. Benjamin Portal, whose 58 | eyes are as handsome as ever. Everybody is extremely anxious for 59 | your return, but as you cannot come home by the Ashe ball, I am 60 | glad that I have not fed them with false hopes. James danced with 61 | Alithea, and cut up the turkey last night with great perseverance. 62 | You say nothing of the silk stockings; I flatter myself, therefore, 63 | that Charles has not purchased any, as I cannot very well afford to 64 | pay for them; all my money is spent in buying white gloves and pink 65 | persian. I wish Charles had been at Manydown, because he would have 66 | given you some description of my friend, and I think you must be 67 | impatient to hear something about him. 68 | 69 | Henry is still hankering after the Regulars, and as his project of 70 | purchasing the adjutancy of the Oxfordshire is now over, he has got 71 | a scheme in his head about getting a lieutenancy and adjutancy in 72 | the 86th, a new-raised regiment, which he fancies will be ordered 73 | to the Cape of Good Hope. I heartily hope that he will, as usual, 74 | be disappointed in this scheme. We have trimmed up and given away 75 | all the old paper hats of Mamma's manufacture; I hope you will not 76 | regret the loss of yours. 77 | 78 | After I had written the above, we received a visit from Mr. Tom 79 | Lefroy and his cousin George. The latter is really very 80 | well-behaved now; and as for the other, he has but one fault, which 81 | time will, I trust, entirely remove -- it is that his morning coat 82 | is a great deal too light. He is a very great admirer of Tom Jones, 83 | and therefore wears the same coloured clothes, I imagine, which _he_ 84 | did when he was wounded. 85 | 86 | _Sunday._ -- By not returning till the 19th, you will exactly 87 | contrive to miss seeing the Coopers, which I suppose it is your 88 | wish to do. We have heard nothing from Charles for some time. One 89 | would suppose they must have sailed by this time, as the wind is so 90 | favourable. What a funny name Tom has got for his vessel! But he 91 | has no taste in names, as we well know, and I dare say he 92 | christened it himself. I am sorry for the Beaches' loss of their 93 | little girl, especially as it is the one so much like me. 94 | 95 | I condole with Miss M. on her losses and with Eliza on her gains, 96 | and am ever yours, 97 | 98 | J. A. 99 | 100 | To Miss Austen, 101 | Rev. Mr. Fowle's, Kintbury, Newbury. 102 | 103 | -------------------------------------------------------------------------------- /data/2.txt: -------------------------------------------------------------------------------- 1 | 2 | II 3 | 4 | Steventon: Thursday (January 16) 5 | 6 | I have just received yours and Mary's letter, and I thank you both, 7 | though their contents might have been more agreeable. I do not at 8 | all expect to see you on Tuesday, since matters have fallen out so 9 | pleasantly; and if you are not able to return till after that day, 10 | it will hardly be possible for us to send for you before Saturday, 11 | though for my own part I care so little about the ball that it 12 | would be no sacrifice to me to give it up for the sake of seeing 13 | you two days earlier. We are extremely sorry for poor Eliza's 14 | illness. I trust, however, that she has continued to recover since 15 | you wrote, and that you will none of you be the worse for your 16 | attendance on her. What a good-for-nothing fellow Charles is to 17 | bespeak the stockings! I hope he will be too hot all the rest of 18 | his life for it! 19 | 20 | I sent you a letter yesterday to Ibthorp, which I suppose you will 21 | not receive at Kintbury. It was not very long or very witty, and 22 | therefore if you never receive it, it does not much signify. I 23 | wrote principally to tell you that the Coopers were arrived and in 24 | good health. The little boy is very like Dr. Cooper, and the little 25 | girl is to resemble Jane, they say. 26 | 27 | Our party to Ashe to-morrow night will consist of Edward Cooper, 28 | James (for a ball is nothing without _him_), Buller, who is now 29 | staying with us, and I. I look forward with great impatience to it, 30 | as I rather expect to receive an offer from my friend in the course 31 | of the evening. I shall refuse him, however, unless he promises to 32 | give away his white coat. 33 | 34 | I am very much flattered by your commendation of my last letter, 35 | for I write only for fame, and without any view to pecuniary 36 | emolument. 37 | 38 | Edward is gone to spend the day with his friend, John Lyford, and 39 | does not return till to-morrow. Anna is now here; she came up in 40 | her chaise to spend the day with her young cousins, but she does 41 | not much take to them or to anything about them, except Caroline's 42 | spinning-wheel. I am very glad to find from Mary that Mr. and 43 | Mrs. Fowle are pleased with you. I hope you will continue to give 44 | satisfaction. 45 | 46 | How impertinent you are to write to me about Tom, as if I had not 47 | opportunities of hearing from him myself! The _last_ letter that I 48 | received from him was dated on Friday, 8th, and he told me that if 49 | the wind should be favourable on Sunday, which it proved to be, 50 | they were to sail from Falmouth on that day. By this time, 51 | therefore, they are at Barbadoes, I suppose. The Rivers are still 52 | at Manydown, and are to be at Ashe to-morrow. I intended to call on 53 | the Miss Biggs yesterday had the weather been tolerable. Caroline, 54 | Anna, and I have just been devouring some cold souse, and it would 55 | be difficult to say which enjoyed it most. 56 | 57 | Tell Mary that I make over Mr. Heartley and all his estate to her 58 | for her sole use and benefit in future, and not only him, but all 59 | my other admirers into the bargain wherever she can find them, even 60 | the kiss which C. Powlett wanted to give me, as I mean to confine 61 | myself in future to Mr. Tom Lefroy, for whom I don't care sixpence. 62 | Assure her also, as a last and indubitable proof of Warren's 63 | indifference to me, that he actually drew that gentleman's picture 64 | for me, and delivered it to me without a sigh. 65 | 66 | _Friday._ -- At length the day is come on which I am to flirt my 67 | last with Tom Lefroy, and when you receive this it will be over. My 68 | tears flow as I write at the melancholy idea. Wm. Chute called here 69 | yesterday. I wonder what he means by being so civil. There is a 70 | report that Tom is going to be married to a Lichfield lass. John 71 | Lyford and his sister bring Edward home today, dine with us, and we 72 | shall all go together to Ashe. I understand that we are to draw for 73 | partners. I shall be extremely impatient to hear from you again, 74 | that I may know how Eliza is, and when you are to return. 75 | 76 | With best love, &c., I am affectionately yours, 77 | 78 | J. AUSTEN. 79 | 80 | Miss Austen, 81 | The Rev. Mr. Fowle's, Kintbury, Newbury. 82 | 83 | -------------------------------------------------------------------------------- /data/3.txt: -------------------------------------------------------------------------------- 1 | 2 | III 3 | 4 | Cork Street: Tuesday morn (August, 1796). 5 | 6 | MY DEAR CASSANDRA, 7 | 8 | Here I am once more in this scene of dissipation and vice, and I 9 | begin already to find my morals corrupted. We reached Staines 10 | yesterday, I do not (know) when, without suffering so much from the 11 | heat as I had hoped to do. We set off again this morning at seven 12 | o'clock, and had a very pleasant drive, as the morning was cloudy 13 | and perfectly cool. I came all the way in the chaise from Hertford 14 | Bridge. 15 | 16 | Edward and Frank are both gone out to seek their fortunes; the 17 | latter is to return soon and help us seek ours. The former we shall 18 | never see again. We are to be at Astley's to-night, which I am glad 19 | of. Edward has heard from Henry this morning. He has not been at 20 | the races at all, unless his driving Miss Pearson over to Rowling 21 | one day can be so called. We shall find him there on Thursday. 22 | 23 | I hope you are all alive after our melancholy parting yesterday, 24 | and that you pursued your intended avocation with success. God 25 | bless you! I must leave off, for we are going out. 26 | 27 | Yours very affectionately, 28 | 29 | J. AUSTEN. 30 | 31 | Everybody's love. 32 | 33 | 34 | -------------------------------------------------------------------------------- /data/4.txt: -------------------------------------------------------------------------------- 1 | 2 | IV 3 | 4 | Rowling: Thursday (September 1). 5 | 6 | MY DEAREST CASSANDRA, 7 | 8 | The letter which I have this moment received from you has diverted 9 | me beyond moderation. I could die of laughter at it, as they used 10 | to say at school. You are indeed the finest comic writer of the 11 | present age. 12 | 13 | Since I wrote last, we have been very near returning to Steventon 14 | so early as next week. Such, for a day or two, was our dear brother 15 | Henry's scheme, but at present matters are restored, not to what 16 | they were, for my absence seems likely to be lengthened still 17 | farther. I am sorry for it, but what can I do? 18 | 19 | Henry leaves us to-morrow for Yarmouth, as he wishes very much to 20 | consult his physician there, on whom he has great reliance. He is 21 | better than he was when he first came, though still by no means 22 | well. According to his present plan, he will not return here till 23 | about the 28rd, and bring with him, if he can, leave of absence for 24 | three weeks, as he wants very much to have some shooting at 25 | Godmersham, whither Edward and Elizabeth are to remove very early 26 | in October. If this scheme holds, I shall hardly be at Steventon 27 | before the middle of that month; but if you cannot do without me, I 28 | could return, I suppose, with Frank if he ever goes back. He enjoys 29 | himself here very much, for he has just learnt to turn, and is so 30 | delighted with the employment, that he is at it all day long. 31 | 32 | I am sorry that you found such a conciseness in the strains of my 33 | first letter. I must endeavour to make you amends for it, when we 34 | meet, by some elaborate details, which I shall shortly begin 35 | composing. 36 | 37 | I have had my new gown made up, and it really makes a very superb 38 | surplice. I am sorry to say that my new coloured gown is very much 39 | washed out, though I charged everybody to take great care of it. I 40 | hope yours is so too. Our men had but indifferent weather for their 41 | visit to Godmersham, for it rained great part of the way there and 42 | all the way back. They found Mrs. Knight remarkably well and in 43 | very good spirits. It is imagined that she will shortly be married 44 | again. I have taken little George once in my arms since I have been 45 | here, which I thought very kind. I have told Fanny about the bead 46 | of her necklace, and she wants very much to know where you found 47 | it. 48 | 49 | To-morrow I shall be just like Camilla in Mr. Dubster's 50 | summer-house, for my Lionel will have taken away the ladder by 51 | which I came here, or at least by which I intended to get away, and 52 | here I must stay till his return. My situation, however, is 53 | somewhat preferable to hers, for I am very happy here, though I 54 | should be glad to get home by the end of the month. I have no idea 55 | that Miss Pearson will return with me. 56 | 57 | What a fine fellow Charles is, to deceive us into writing two 58 | letters to him at Cork! I admire his ingenuity extremely, 59 | especially as he is so great a gainer by it. 60 | 61 | Mr. and Mrs. Cage and Mr. and Mrs. Bridges dined with us yesterday. 62 | Fanny seemed as glad to see me as anybody, and inquired very much 63 | after you, whom she supposed to be making your wedding-clothes. She 64 | is as handsome as ever, and somewhat fatter. We had a very pleasant 65 | day, and some _liqueurs_ in the evening. Louisa's figure is very 66 | much improved; she is as stout again as she was. Her face, from 67 | what I could see of it one evening, appeared not at all altered. 68 | She and the gentlemen walked up here on Monday night -- she came in 69 | the morning with the Cages from Hythe. 70 | 71 | Lady Hales, with her two youngest daughters, have been to see us. 72 | Caroline is not grown at all coarser than she was, nor Harriet at 73 | all more delicate. I am glad to hear so good an account of 74 | Mr. Charde, and only fear that my long absence may occasion his 75 | relapse. I practise every day as much as I can -- I wish it were 76 | more for his sake. I have heard nothing of Mary Robinson since I 77 | have been (here). I expect to be well scolded for daring to doubt, 78 | whenever the subject is mentioned. 79 | 80 | Frank has turned a very nice little butterchurn for Fanny. I do not 81 | believe that any of the party were aware of the valuables they had 82 | left behind; nor can I hear anything of Anna's gloves. Indeed I 83 | have not inquired at all about them hitherto. 84 | 85 | We are very busy making Edward's shirts, and I am proud to say that 86 | I am the neatest worker of the party. They say that there are a 87 | prodigious number of birds hereabouts this year, so that perhaps _I_ 88 | may kill a few. I am glad to hear so good an account of Mr. Limprey 89 | and J. Lovett. I know nothing of my mother's handkerchief, but I 90 | dare say I shall find it soon. 91 | 92 | I am very affectionately yours, 93 | 94 | JANE. 95 | 96 | Miss Austen, Steventon, Overton, Hants. 97 | 98 | -------------------------------------------------------------------------------- /data/5.txt: -------------------------------------------------------------------------------- 1 | 2 | V 3 | 4 | Rowling: Monday (September 5). 5 | 6 | MY DEAR CASSANDRA, 7 | 8 | I shall be extremely anxious to hear the event of your ball, and 9 | shall hope to receive so long and minute an account of every 10 | particular that I shall be tired of reading it. Let me know how 11 | many, besides their fourteen selves and Mr. and Mrs. Wright, 12 | Michael will contrive to place about their coach, and how many of 13 | the gentlemen, musicians, and waiters, he will have persuaded to 14 | come in their shooting-jackets. I hope John Lovett's accident will 15 | not prevent his attending the ball, as you will otherwise be 16 | obliged to dance with Mr. Tincton the whole evening. Let me know 17 | how J. Harwood deports himself without the Miss Biggs, and which of 18 | the Marys will carry the day with my brother James. 19 | 20 | We were at a ball on Saturday, I assure you. We dined at 21 | Goodnestone, and in the evening danced two country-dances and the 22 | Boulangeries. I opened the ball with Edward Bridges; the other 23 | couples were Lewis Cage and Harriet, Frank and Louisa, Fanny and 24 | George. Elizabeth played one country-dance, Lady Bridges the other, 25 | which she made Henry dance with her, and Miss Finch played the 26 | Boulangeries. 27 | 28 | In reading over the last three or four lines, I am aware of my 29 | having expressed myself in so doubtful a manner that, if I did not 30 | tell you to the contrary, you might imagine it was Lady Bridges who 31 | made Henry dance with her at the same time that she was playing, 32 | which, if not impossible, must appear a very improbable event to 33 | you. But it was Elizabeth who danced. We supped there, and walked 34 | home at night under the shade of two umbrellas. 35 | 36 | To-day the Goodnestone party begins to disperse and spread itself 37 | abroad. Mr. and Mrs. Cage and George repair to Hythe. Lady Waltham, 38 | Miss Bridges, and Miss Mary Finch to Dover, for the health of the 39 | two former. I have never seen Marianne at all. On Thursday Mr. and 40 | Mrs. Bridges return to Danbury; Miss Harriet Hales accompanies them 41 | to London on her way to Dorsetshire. 42 | 43 | Farmer Claringbould died this morning, and I fancy Edward means to 44 | get some of his farm, if he can cheat Sir Brook enough in the 45 | agreement. 46 | 47 | We have just got some venison from Godmersham, which the two 48 | Mr. Harveys are to dine on to-morrow, and on Friday or Saturday the 49 | Goodnestone people are to finish their scraps. Henry went away on 50 | Friday, as he purposed, _without fayl_. You will hear from him 51 | soon, I imagine, as he talked of writing to Steventon shortly. 52 | Mr. Richard Harvey is going to be married; but as it is a great 53 | secret, and only known to half the neighbourhood, you must not 54 | mention it. The lady's name is Musgrave. 55 | 56 | I am in great distress. I cannot determine whether I shall give 57 | Richis half a guinea or only five shillings when I go away. Counsel 58 | me, amiable Miss Austen, and tell me which will be the most. 59 | 60 | We walked Frank last night to Crixhall Ruff, and he appeared much 61 | edified. Little Edward was breeched yesterday for good and all, and 62 | was whipped into the bargain. 63 | 64 | Pray remember me to everybody who does not inquire after me; those 65 | who do, remember me without bidding. Give my love to Mary Harrison, 66 | and tell her I wish, whenever she is attached to a young man, some 67 | _respectable_ Dr. Marchmont may keep them apart for five volumes. 68 | 69 | . . . . . . . 70 | 71 | 72 | -------------------------------------------------------------------------------- /data/6.txt: -------------------------------------------------------------------------------- 1 | 2 | VI 3 | 4 | Rowling: Thursday (September 15). 5 | 6 | MY DEAR CASSANDRA, 7 | 8 | We have been very gay since I wrote last; dining at Nackington, 9 | returning by moonlight, and everything quite in style, not to 10 | mention Mr. Claringbould's funeral which we saw go by on Sunday. 11 | 12 | I believe I told you in a former letter that Edward had some idea 13 | of taking the name of Clarmgbould; but that scheme is over, though 14 | it would be a very eligible as well as a very pleasant plan, would 15 | anyone advance him money enough to begin on. We rather expected 16 | Mr. Milles to have done so on Tuesday; but to our great surprise 17 | nothing was said on the subject, and unless it is in your power to 18 | assist your brother with five or six hundred pounds, he must 19 | entirely give up the idea. 20 | 21 | At Nackington we met Lady Sondes' picture over the mantel-piece in 22 | the dining-room, and the pictures of her three children in an ante 23 | room, besides Mr. Scott, Miss Fletcher, Mr. Toke, Mr. J. Toke, and 24 | the Archdeacon Lynch. Miss Fletcher and I were very thick, but I am 25 | the thinnest of the two. She wore her purple muslin, which is 26 | pretty enough, though it does not become her complexion. There are 27 | two traits in her character which are pleasing -- namely, she 28 | admires Camilla, and drinks no cream in her tea. If you should ever 29 | see Lucy, you may tell her that I scolded Miss Fletcher for her 30 | negligence in writing, as she desired me to do, but without being 31 | able to bring her to any proper sense of shame -- that Miss 32 | Fletcher says in her defence, that as everybody whom Lucy knew when 33 | she was in Canterbury has now left it, she has nothing at all to 34 | write to her about. By _everybody_, I suppose Miss Fletcher means 35 | that a new set of officers have arrived there. But this is a note 36 | of my own. 37 | 38 | Mrs. Milles, Mr. John Toke, and in short everybody of any 39 | sensibility inquired in tender strains after you, and I took an 40 | opportunity of assuring Mr. J. T. that neither he nor his father 41 | need longer keep themselves single for you. 42 | 43 | We went in our two carriages to Nackington; but how we divided I 44 | shall leave you to surmise, merely observing that, as Elizabeth and 45 | I were without either hat or bonnet, it would not have been very 46 | convenient for us to go in the chaise. We went by Bifrons, and I 47 | contemplated with a melancholy pleasure the abode of him on whom I 48 | once fondly doated. We dine to-day at Goodnestone, to meet my Aunt 49 | Fielding from Margate and a Mr. Clayton, her professed admirer -- 50 | at least so I imagine. Lady Bridges has received very good accounts 51 | of Marianne, who is already certainly the better for her bathing. 52 | 53 | So His Royal Highness Sir Thomas Williams has at length sailed; the 54 | papers say "on a cruise." But I hope they are gone to Cork, or I 55 | shall have written in vain. Give my love to Jane, as she arrived at 56 | Steventon yesterday, I dare say. 57 | 58 | I sent a message to Mr. Digweed from Edward in a letter to Mary 59 | Lloyd which she ought to receive to-day; but as I know that the 60 | Harwoods are not very exact as to their letters, I may as well 61 | repeat it to you. Mr. Digweed is to be informed that illness has 62 | prevented Seward's coming over to look at the repairs intended at 63 | the farm, but that he will come as soon as he can. Mr. Digweed may 64 | also be informed, if you think proper, that Mr. and Mrs. Milles are 65 | to dine here to-morrow, and that Mrs. Joan Knatchbull is to be 66 | asked to meet them. Mr. Richard Harvey's match is put off till he 67 | has got a better Christian name, of which he has great hopes. 68 | 69 | Mr. Children's two sons are both going to be married, John and 70 | George. They are to have one wife between them, a Miss Holwell, who 71 | belongs to the Black Hole at Calcutta. I depend on hearing from 72 | James very soon; he promised me an account of the ball, and by this 73 | time he must have collected his ideas enough after the fatigue of 74 | dancing to give me one. 75 | 76 | Edward and Fly went out yesterday very early in a couple of 77 | shooting jackets, and came home like a couple of bad shots, for 78 | they killed nothing at all. They are out again to-day, and are not 79 | yet returned. Delightful sport! They are just come home, Edward 80 | with his two brace, Frank with his two and a half. What amiable 81 | young men! 82 | 83 | _Friday._ -- Your letter and one from Henry are just come, and the 84 | contents of both accord with my scheme more than I had dared 85 | expect. In one particular I could wish it otherwise, for Henry is 86 | very indifferent indeed. You must not expect us quite so early, 87 | however, as Wednesday, the 20th -- on that day se'nnight, according 88 | to our present plan, we may be with you. Frank had never any idea 89 | of going away before Monday, the 26th. I shall write to Miss Mason 90 | immediately and press her returning with us, which Henry thinks 91 | very likely and particularly eligible. 92 | 93 | Buy Mary Harrison's gown by all means. You shall have mine for ever 94 | so much money, though, if I am tolerably rich when I get home, I 95 | shall like it very much myself. 96 | 97 | As to the mode of our travelling to town, _I_ want to go in a 98 | stage-coach, but Frank will not let me. As you are likely to have 99 | the Williams and Lloyds with you next week, you would hardly find 100 | room for us then. If anyone wants anything in town, they must send 101 | their commissions to Frank, as _I_ shall merely pass through it. 102 | The tallow-chandler is Penlington, at the Crown and Beehive, 103 | Charles Street, Covent Garden. 104 | 105 | Miss Austen, Steventon, Overton, Hants. 106 | 107 | 108 | -------------------------------------------------------------------------------- /data/7.txt: -------------------------------------------------------------------------------- 1 | 2 | VII 3 | 4 | Rowling: Sunday (September 18.) 5 | 6 | MY DEAR CASSANDRA, 7 | 8 | This morning has been spent in doubt and deliberation, in forming 9 | plans and removing difficulties, for it ushered in the day with an 10 | event which I had not intended should take place so soon by a week. 11 | Frank has received his appointment on board the "Captain John 12 | Gore," commanded by the "Triton," and will therefore be obliged to 13 | be in town on Wednesday; and though I have every disposition in the 14 | world to accompany him on that day, I cannot go on the uncertainty 15 | of the Pearsons being at home, as I should not have a place to go 16 | to in case they were from home. 17 | 18 | I wrote to Miss P. on Friday, and hoped to receive an answer from 19 | her this morning, which would have rendered everything smooth and 20 | easy, and would have enabled us to leave this place to-morrow, as 21 | Frank, on first receiving his appointment, intended to do. He 22 | remains till Wednesday merely to accommodate me. I have written to 23 | her again to-day, and desired her to answer it by return of post. 24 | On Tuesday, therefore, I shall positively know whether they can 25 | receive me on Wednesday. If they cannot, Edward has been so good as 26 | to promise to take me to Greenwich on the Monday following, which 27 | was the day before fixed on, if that suits them better. If I have 28 | no answer at all on Tuesday, I must suppose Mary is not at home, 29 | and must wait till I _do_ hear, as, after having invited her to go 30 | to Steventon with me, it will not quite do to go home and say no 31 | more about it. 32 | 33 | My father will be so good as to fetch home his prodigal daughter 34 | from town, I hope, unless he wishes me to walk the hospitals, enter 35 | at the Temple, or mount guard at St. James'. It will hardly be in 36 | Frank's power to take me home -- nay, it certainly will not. I 37 | shall write again as soon as I get to Greenwich. 38 | 39 | What dreadful hot weather we have! It keeps one in a continual 40 | state of inelegance. 41 | 42 | If Miss Pearson should return with me, pray be careful not to 43 | expect too much beauty. I will not pretend to say that _on a first 44 | view_ she quite answered the opinion I had formed of her. My 45 | mother, I am sure, will be disappointed if she does not take great 46 | care. From what I remember of her picture, it is no great 47 | resemblance. 48 | 49 | I am very glad that the idea of returning with Frank occurred to 50 | me; for as to Henry's coming into Kent again, the time of its 51 | taking place is so very uncertain that I should be waiting for 52 | _dead men's shoes_. I had once determined to go with Frank 53 | to-morrow and take my chance, &c., but they dissuaded me from so 54 | rash a step, as I really think on consideration it would have been; 55 | for if the Pearsons were not at home, I should inevitably fall a 56 | sacrifice to the arts of some fat woman who would make me drunk 57 | with small beer. 58 | 59 | Mary is brought to bed of a boy -- both doing very well. I shall 60 | leave you to guess what Mary I mean. Adieu, with best love to all 61 | your agreeable inmates. Don't let the Lloyds go on any account 62 | before I return, unless Miss P. is of the party. How ill I have 63 | written! I begin to hate myself. Yours ever, J. AUSTEN. 64 | 65 | The "Triton" is a new 32 frigate just launch at Deptford. Frank is 66 | much pleased with the prospect of having Captain Gore under his 67 | command. 68 | 69 | Miss Austen, Steventon, Overton, Hants. 70 | 71 | -------------------------------------------------------------------------------- /data/doc-struc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/endpnt/andoc/e4cba7995865ca7a49e6a573518f1de7d88dbe3f/data/doc-struc.png -------------------------------------------------------------------------------- /data/event-timeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/endpnt/andoc/e4cba7995865ca7a49e6a573518f1de7d88dbe3f/data/event-timeline.jpg -------------------------------------------------------------------------------- /data/event-tree.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Root Event 1", 3 | "children": [ 4 | { 5 | "name": "Sub Event 2", 6 | "date": "2010-01-01", 7 | "children": [ 8 | { 9 | "name": "Sub-Sub-Event 3", 10 | "children": [ 11 | {"name": "Event 4", "date": "3938"}, 12 | {"name": "Event 5", "date": "3812"}, 13 | {"name": "Event 6", "date": "743"} 14 | ] 15 | }, 16 | { 17 | "name": "Event 7", 18 | "date": "2010", 19 | "children": [ 20 | {"name": "Event 8", "date": "3534"}, 21 | {"name": "Event 9", "date": "5731"} 22 | ] 23 | } 24 | ] 25 | } 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /doc.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | # 14 | 15 | from hashlib import sha1 16 | from rediskeys import * 17 | from os import path 18 | from itertools import izip_longest 19 | 20 | class Document(object): 21 | def __init__(self, redis, id=None, name=None): 22 | self._redis = redis 23 | if id is not None: 24 | self.id = int(id) 25 | else: 26 | self.id = None 27 | 28 | if self.id > 0: 29 | self._load(self.id) 30 | 31 | def add_relation(self, pre, object): 32 | obj_hash = sha1(object).hexdigest() 33 | obj_id = self._redis.get(OBJ_ID % (pre, obj_hash)) 34 | self._redis.zincrby(DOC_RELATED % (self.id, pre), obj_id) 35 | self._redis.incr(DOC_RELATED_COUNT % (self.id, pre)) 36 | 37 | def _load(self, id): 38 | filename = self._redis.get(DOC_FILENAME % int(id)) 39 | if filename and path.exists(filename) and path.isfile(filename): 40 | self.content = open(filename,'r').read(-1) 41 | self.length = len(self.content) 42 | self.filename = filename 43 | else: 44 | self.content = None 45 | self.length = None 46 | self.filename = None 47 | 48 | def add(self, docpath, title = None): 49 | if path.exists(docpath) and path.isfile(docpath): 50 | self.id = int(self._redis.incr(NEXT_DOC_ID)) 51 | self._redis.set(DOC_FILENAME % self.id, docpath) 52 | if title is not None: 53 | self._redis.set(DOC_TITLE % self.id, title) 54 | self._redis.sadd(ALL_DOCS,self.id) 55 | self._load(self.id) 56 | self._redis.set(DOC_LENGTH % self.id, self.length) 57 | 58 | return True 59 | else: 60 | return False 61 | 62 | class Documents(object): 63 | def __init__(self, redis): 64 | self._redis = redis 65 | 66 | def get_all(self): 67 | return self._redis.smembers(ALL_DOCS) 68 | 69 | def get_list(self): 70 | pipe = self._redis.pipeline() 71 | pipe.sort( 72 | ALL_DOCS, 73 | by = 'nosort', 74 | get = [ 75 | '#', # that is the id itself 76 | DOC_TITLE % '*', 77 | DOC_DATE % '*', 78 | DOC_RELATED_COUNT % ('*','event'), 79 | DOC_RELATED_COUNT % ('*','place'), 80 | DOC_RELATED_COUNT % ('*','person'), 81 | DOC_RELATED_COUNT % ('*','date'), 82 | ] 83 | ) 84 | res = pipe.execute() 85 | print res[0] 86 | res_by = [ a for a in [iter(res[0])] * 7 ] 87 | docs = [ dict({'id': id, 88 | 'title': title, 89 | 'date': date, 90 | 'event_count': evc, 91 | 'place_count': plc, 92 | 'person_count': pec, 93 | 'date_count': dac 94 | }) \ 95 | for id,title,date,evc,plc,pec,dac in izip_longest(*res_by) ] 96 | 97 | print docs 98 | return docs 99 | 100 | -------------------------------------------------------------------------------- /featext.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # - read and calc feature data from redis 3 | # - calc graph 4 | 5 | from rediskeys import * 6 | from redis import Redis 7 | from urlparse import urlsplit 8 | from sys import exit 9 | from itertools import combinations 10 | import igraph 11 | 12 | redis = Redis() 13 | pipe = redis.pipeline() 14 | 15 | # load all document ids 16 | document_ids = list(redis.smembers(ALL_DOCS)) 17 | 18 | # load all related person->objects for each document 19 | for docid in document_ids: 20 | pipe.zrange(DOC_RELATED % (docid, 'person'), 0,-1, withscores=True) 21 | 22 | tmp_obj_score = pipe.execute() 23 | 24 | # save obj and score 25 | object_score = dict() 26 | object_ids = set() 27 | # save doc -> obj relation 28 | doc_obj_map = dict() 29 | for result_set in tmp_obj_score: 30 | docid = document_ids.pop(0) 31 | doc_obj_map[int(docid)] = set( [ int(k) - 1 for k,v in result_set ]) 32 | object_score.update([ (int(k) - 1 ,v) for k,v in result_set]) 33 | object_ids.update(set([int(k) for k,v in result_set])) 34 | 35 | # load all values for each obj_id 36 | for obj_id in object_score.iterkeys(): 37 | pipe.get(OBJECT_VALUE % ('person',obj_id)) 38 | 39 | tmp_obj_value = pipe.execute() 40 | print "%s obj ids" % len(object_ids) 41 | print "%s obj values in redis" % len(tmp_obj_value) 42 | 43 | # load all object values 44 | object_value = dict() 45 | for obj_id in object_score.iterkeys(): 46 | object_value[obj_id] = tmp_obj_value.pop(0) 47 | 48 | print "%s docs" % len(doc_obj_map) 49 | print "%s scores" % len(object_score) 50 | print "%s values" % len(object_value) 51 | 52 | # create a relation between objects in the same document 53 | obj_rel_map = list() 54 | for obj_ids in doc_obj_map.itervalues(): 55 | if len(obj_ids) > 0: 56 | obj_rel_map.extend([ v for v in combinations(obj_ids,2) ]) 57 | 58 | print "%s obj rel" % len(obj_rel_map) 59 | 60 | # count the relations over all documents 61 | obj_rel_count = [ [e, obj_rel_map.count(e)] for e in set(obj_rel_map) ] 62 | 63 | print "%s obj count" % len(obj_rel_count) 64 | 65 | # finally split relation (edges) and count (weight) 66 | edges, weight = map(list, zip(*obj_rel_count)) 67 | 68 | print "%s edges" % len(edges) 69 | print "%s weight" % len(weight) 70 | 71 | g = igraph.Graph(edges) 72 | g.es['weight'] = weight 73 | g.vs['label'] = object_value.values() 74 | g.vs['id'] = list(object_ids) 75 | g.vs['score'] = object_score.values() 76 | 77 | print g 78 | g.save('data/person.gml') 79 | -------------------------------------------------------------------------------- /graph2redis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # - load graph 3 | # - render layout x,y postions 4 | # - save positions in redis 5 | 6 | from rediskeys import * 7 | from redis import Redis 8 | from urlparse import urlsplit 9 | from sys import exit 10 | from itertools import combinations 11 | import igraph 12 | import math 13 | import string 14 | 15 | redis = Redis() 16 | pipe = redis.pipeline() 17 | 18 | g = igraph.load('data/person.gml') 19 | 20 | layout = g.layout_fruchterman_reingold(weights='weight', maxiter=3000) 21 | 22 | # calc graph size based on number of vertices 23 | width = len(g.vs) * 20 24 | height = len(g.vs) * 20 25 | 26 | if width < 800: 27 | width = 800 28 | if height < 400: 29 | height = 400 30 | 31 | labels = g.vs.get_attribute_values('label') 32 | ids = g.vs.get_attribute_values('id') 33 | 34 | # from igraph.Graph.write_svg 35 | vertex_size = 10 36 | 37 | maxs = [ layout[0][dim] for dim in range(2) ] 38 | mins = [ layout[0][dim] for dim in range(2) ] 39 | 40 | for rowidx in range(1, len(layout)): 41 | row = layout[rowidx] 42 | for dim in range(0, 2): 43 | if maxs[dim] < row[dim]: maxs[dim] = row[dim] 44 | if mins[dim] > row[dim]: mins[dim] = row[dim] 45 | 46 | sizes = [ width-2*vertex_size, height-2*vertex_size] 47 | halfsizes= [ (maxs[dim] + mins[dim])/2.0 for dim in range(2) ] 48 | ratios = [ sizes[dim] / (maxs[dim] - mins[dim]) for dim in range(2) ] 49 | layout = [[ (row[0] - halfsizes[0]) * ratios[0], \ 50 | (row[1] - halfsizes[1]) * ratios[1]] \ 51 | for row in layout] 52 | 53 | edges = [] 54 | vertices = [] 55 | 56 | for eidx, edge in enumerate(g.es): 57 | vidxs = edge.tuple 58 | x1 = layout[vidxs[0]][0] 59 | y1 = layout[vidxs[0]][1] 60 | x2 = layout[vidxs[1]][0] 61 | y2 = layout[vidxs[1]][1] 62 | angle = math.atan2(y2-y1, x2-x1) 63 | x2 = x2 - vertex_size * math.cos(angle) 64 | y2 = y2 - vertex_size * math.sin(angle) 65 | edges.append( 66 | {'x1': '%.4f' % x1, 67 | 'y1': '%.4f' % y1, 68 | 'x2': '%.4f' % x2, 69 | 'y2': '%.4f' % y2 }) 70 | 71 | for vidx in range(g.vcount()): 72 | tmpd = { 73 | 'x': '%.4f' % layout[vidx][0], 74 | 'y': '%.4f' % layout[vidx][1], 75 | 'id': str(ids[vidx]), 76 | 'label': str(labels[vidx]) 77 | } 78 | vertices.append(tmpd) 79 | 80 | #print edges 81 | #print vertices 82 | 83 | # get the old list of ids first 84 | old_layout_edges = redis.smembers(LAYOUT_EDGES) 85 | old_layout_vertices = redis.smembers(LAYOUT_VERTICES) 86 | 87 | # get a pipe 88 | pipe = redis.pipeline() 89 | 90 | # delete the old ids 91 | pipe.delete(LAYOUT_EDGES) 92 | pipe.delete(LAYOUT_VERTICES) 93 | 94 | pipe.set(LAYOUT_WIDTH, width) 95 | pipe.set(LAYOUT_HEIGHT, height) 96 | 97 | # set edges 98 | for edge in edges: 99 | edge_id = redis.incr(LAYOUT_NEXT_EDGE_ID) 100 | pipe.set(LAYOUT_EDGE_POS_X1 % edge_id, edge['x1']) 101 | pipe.set(LAYOUT_EDGE_POS_X2 % edge_id, edge['x2']) 102 | pipe.set(LAYOUT_EDGE_POS_Y1 % edge_id, edge['y1']) 103 | pipe.set(LAYOUT_EDGE_POS_Y2 % edge_id, edge['y2']) 104 | # add egde id to the set 105 | pipe.sadd(LAYOUT_EDGES, edge_id) 106 | 107 | # set vertices 108 | for v in vertices: 109 | v_id = redis.incr(LAYOUT_NEXT_VERTICE_ID) 110 | pipe.set(LAYOUT_VERTICE_POS_X % v_id, v['x']) 111 | pipe.set(LAYOUT_VERTICE_POS_Y % v_id, v['y']) 112 | pipe.set(LAYOUT_VERTICE_OBJ_ID % v_id, v['id']) 113 | pipe.set(LAYOUT_VERTICE_LABEL % v_id, v['label']) 114 | # add vid to the set 115 | pipe.sadd(LAYOUT_VERTICES, v_id) 116 | 117 | # delete old edges 118 | for old_edge_id in old_layout_edges: 119 | pipe.delete(LAYOUT_EDGE_POS_X1 % old_edge_id) 120 | pipe.delete(LAYOUT_EDGE_POS_X2 % old_edge_id) 121 | pipe.delete(LAYOUT_EDGE_POS_Y1 % old_edge_id) 122 | pipe.delete(LAYOUT_EDGE_POS_Y2 % old_edge_id) 123 | 124 | # delete old vertices 125 | for old_vid in old_layout_vertices: 126 | pipe.delete(LAYOUT_VERTICE_POS_X % old_vid) 127 | pipe.delete(LAYOUT_VERTICE_POS_Y % old_vid) 128 | pipe.delete(LAYOUT_VERTICE_OBJ_ID % old_vid) 129 | pipe.delete(LAYOUT_VERTICE_LABEL % old_vid) 130 | 131 | # all in one atomic batch 132 | # and we should not see any empty graph on the UI. 133 | result = pipe.execute() 134 | 135 | #print result, 136 | print "Saved %s edges and %s vertices" % (len(edges), len(vertices)) 137 | 138 | 139 | -------------------------------------------------------------------------------- /import.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | # 16 | 17 | import redis 18 | from os import path, walk 19 | from sys import exit, argv 20 | from doc import * 21 | from rediskeys import * 22 | 23 | def usage(): 24 | print "Andoc Document Import" 25 | print 'Usage: %s DIR [PATTERN]' % argv[0] 26 | print ' - Add all files in DIR' 27 | print ' - Optional PATTERN can be a regex string matching a filename' 28 | 29 | def main(): 30 | if len(argv) < 2: 31 | usage() 32 | exit(0) 33 | 34 | if path.exists(argv[1]) and path.isdir(argv[1]): 35 | search_dir = argv[1] 36 | else: 37 | print "Error: invalid directory" 38 | exit(1) 39 | 40 | filepat = None 41 | if len(argv) == 3: 42 | pattern = argv[2] 43 | try: 44 | import re 45 | filepat = re.compile(r''+pattern) 46 | except re.error: 47 | print "Error: invald regex pattern" 48 | exit(1) 49 | 50 | valid_files = [] 51 | for root, dirs, files in walk(search_dir): 52 | for name in files: 53 | if filepat and re.search(filepat, name) is not None: 54 | valid_files.append(path.join(root, name)) 55 | elif filepat is None: 56 | valid_files.append(path.join(root, name)) 57 | else: 58 | continue 59 | 60 | if len(valid_files) == 0: 61 | print "Error: no files found" 62 | exit(1) 63 | 64 | r = redis.Redis() 65 | for file in valid_files: 66 | doc = Document(r) 67 | if doc.add('%s' % file): 68 | print "%s %s added (%s)" % (doc.filename, doc.id, doc.length) 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /import_maildir.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program. If not, see . 15 | # 16 | 17 | import redis 18 | from email import message_from_file 19 | from email.utils import mktime_tz, parsedate_tz 20 | from os import path, walk 21 | from sys import exit, argv 22 | from doc import * 23 | from selection import * 24 | from rediskeys import * 25 | from triple import * 26 | 27 | def usage(): 28 | print "import raw emails from maildir" 29 | print 'Usage: %s MAILDIR' % argv[0] 30 | 31 | def main(): 32 | if path.exists(argv[1]) and path.isdir(argv[1]): 33 | search_dir = argv[1] 34 | else: 35 | print "Error: invalid directory" 36 | exit(1) 37 | 38 | valid_emails = [] 39 | for root, dirs, files in walk(search_dir): 40 | for name in files: 41 | valid_emails.append(path.join(root, name)) 42 | 43 | if len(valid_emails) == 0: 44 | print "Error: no files found" 45 | exit(1) 46 | 47 | r = redis.Redis() 48 | for email in valid_emails: 49 | msg = message_from_file(open(email)) 50 | 51 | has_plaintext = False 52 | if msg.is_multipart(): 53 | for part in msg.walk(): 54 | if part.get_content_type() == 'text/plain': 55 | has_plaintext = True 56 | plaintext = part.get_payload(decode=True) 57 | else: 58 | if msg.get_content_type() == 'text/plain': 59 | has_plaintext = True 60 | plaintext = msg.get_payload(decode=True) 61 | 62 | 63 | if has_plaintext: 64 | destfile = open('data/%s.txt' % path.basename(email), 'w') 65 | selections = [] 66 | dates = [] 67 | for k,v in msg.items(): 68 | selection_start = destfile.tell() 69 | # web browser counts one char for \r\n 70 | destfile.write('%s: %s\n' % ( 71 | k.replace('\r','').strip(), 72 | v.replace('\r','').strip()) 73 | ) 74 | selection_end = destfile.tell() 75 | selections.append((selection_start, selection_end, 76 | 'http://www.w3.org/1999/xhtml/#div')) 77 | 78 | if k == 'Date': 79 | ts = mktime_tz(parsedate_tz(v)) 80 | ts_start = len('%s: ' %k ) 81 | ts_end = selection_end - selection_start 82 | dates.append( 83 | (selection_start, selection_end, ts, ts_start, ts_end)) 84 | 85 | destfile.write('\n') 86 | bstart = destfile.tell() 87 | destfile.write(plaintext.replace('\r','').strip()) 88 | bend = destfile.tell() 89 | destfile.close() 90 | selections.append((bstart, bend+1, 91 | 'http://www.w3.org/1999/xhtml/#div')) 92 | 93 | doc = Document(r) 94 | if doc.add('data/%s.txt' % path.basename(email)): 95 | for start,end,ref in selections: 96 | text_selection = TextSelection(doc.id, start, end, ref) 97 | text_selection.save(r) 98 | 99 | for s_start, s_end, ts, ts_start, ts_end in dates: 100 | pre = 'date' 101 | sub = '%s%s#%s.s%se%s' % ( 102 | 'http://127.0.0.1:8080/doc/struc/', 103 | doc.id, 'div', s_start, s_end) 104 | # http://127.0.0.1:8080/doc/struc/1#div.s1086e1124/t6e37 105 | trsub = '%s/t%se%s' % (sub, ts_start, ts_end) 106 | trip = Triple(sub, pre, str(ts)) 107 | tid = trip.save(r) 108 | 109 | h = HtmlSelection(doc.id, sub, ts_start, ts_end, tid) 110 | h.save(r) 111 | 112 | # save the object relation to this document 113 | doc.add_relation(pre, str(ts)) 114 | 115 | 116 | if __name__ == "__main__": 117 | if len(argv) < 2: 118 | usage() 119 | exit(0) 120 | main() 121 | -------------------------------------------------------------------------------- /rediskeys.py: -------------------------------------------------------------------------------- 1 | # document id counter 2 | NEXT_DOC_ID='doc:next:id' 3 | 4 | # set of all document ids 5 | ALL_DOCS='doc:list' 6 | 7 | # full document path and filename 8 | DOC_FILENAME='doc:%s:filename' 9 | 10 | # char length of the document 11 | DOC_LENGTH='doc:%s:length' 12 | 13 | # document title 14 | DOC_TITLE='doc:%s:title' 15 | 16 | # doc date 17 | DOC_DATE='doc:%s:date' 18 | 19 | DOC_NAME_BY_ID='doc:name:by:id:%s' 20 | DOC_ID_BY_NAME='doc:id:by:name:%s' 21 | 22 | DOC_RELATED_COUNT='doc:%s:relcnt:%s' 23 | 24 | # sorted set with related object id and score 25 | # for each document 26 | DOC_RELATED='doc:%s:related:%s' 27 | 28 | # key pattern for redis 29 | PRE_SUBJECTS='triple:%s:subjects' 30 | PRE_OBJECTS='triple:%s:objects' 31 | 32 | # hash to retrive the id of pre -> obj 33 | OBJ_ID='triple:%s:obj:%s:id' 34 | 35 | # value of the pre -> obj 36 | OBJECT_VALUE='triple:%s:obj:%s:value' 37 | 38 | # id counter for objects 39 | NEXT_OBJ_ID='triple:next:%s:object:id' 40 | 41 | # counter: layout 42 | LAYOUT_NEXT_EDGE_ID='layout:next:edge:id' 43 | LAYOUT_NEXT_VERTICE_ID='layout:next:vertice:id' 44 | 45 | # set: all current edge and vertice ids 46 | LAYOUT_EDGES='layout:edges' 47 | LAYOUT_VERTICES='layout:vertices' 48 | 49 | LAYOUT_WIDTH='layout:width' 50 | LAYOUT_HEIGHT='layout:height' 51 | 52 | # string: postion 53 | LAYOUT_EDGE_POS_X1='layout:edge:%s:pos:x1' 54 | LAYOUT_EDGE_POS_X2='layout:edge:%s:pos:x2' 55 | LAYOUT_EDGE_POS_Y1='layout:edge:%s:pos:y1' 56 | LAYOUT_EDGE_POS_Y2='layout:edge:%s:pos:y2' 57 | LAYOUT_VERTICE_POS_X='layout:vertice:%s:pos:x' 58 | LAYOUT_VERTICE_POS_Y='layout:vertice:%s:pos:y' 59 | # meta data 60 | LAYOUT_VERTICE_LABEL='layout:vertice:%s:label' 61 | LAYOUT_VERTICE_OBJ_ID='layout:vertice:%s:obj_id' 62 | 63 | -------------------------------------------------------------------------------- /selection.py: -------------------------------------------------------------------------------- 1 | import re, string 2 | from urlparse import urlsplit 3 | 4 | NEXT_TXT_SELECTION = 'next.doc.%s.text.selection' 5 | TXT_SELECTIONS = 'doc:%s:text.selections' 6 | TXT_SELECTION_START = 'doc:%s:text.selection:%s:start' 7 | TXT_SELECTION_END = 'doc:%s:text.selection:%s:end' 8 | TXT_SELECTION_REF = 'doc:%s:text.selection:%s:ref' 9 | 10 | NEXT_HTML_SELECTION = 'next.doc.%s.html.selection' 11 | HTML_SELECTIONS = 'doc:%s:html.selections' 12 | HTML_SELECTION_NODE = 'doc:%s:html.selection:%s:node' 13 | HTML_SELECTION_START = 'doc:%s:html.selection:%s:start' 14 | HTML_SELECTION_END = 'doc:%s:html.selection:%s:end' 15 | HTML_SELECTION_REF = 'doc:%s:html.selection:%s:ref' 16 | 17 | NAMESPACE = { 'http://www.w3.org/1999/xhtml/#h1': 'h1', 18 | 'http://www.w3.org/1999/xhtml/#h2': 'h2', 19 | 'http://www.w3.org/1999/xhtml/#h3': 'h3', 20 | 'http://www.w3.org/1999/xhtml/#h4': 'h4', 21 | 'http://www.w3.org/1999/xhtml/#p': 'p', 22 | 'http://www.w3.org/1999/xhtml/#ul': 'ul', 23 | 'http://www.w3.org/1999/xhtml/#div': 'div', 24 | 'http://www.w3.org/1999/xhtml/#li': 'li', 25 | 'http://www.w3.org/1999/xhtml/#span':'span' } 26 | 27 | MATCH_NODE = r'^(%s)\.s([0-9]+)e([0-9]+)$' % string.join(NAMESPACE.values(), '|') 28 | 29 | class TextSelection(object): 30 | def __init__(self, docid = None, start = None, end = None, ref = None): 31 | self.docid = int(docid) 32 | self.start = start 33 | self.end = end 34 | self.ref = ref 35 | 36 | 37 | def _valid(self): 38 | # TODO 39 | return True 40 | 41 | def save(self, redis): 42 | if self._valid(): 43 | selection_id = redis.incr(NEXT_TXT_SELECTION % self.docid) 44 | pipe = redis.pipeline() 45 | pipe.set(TXT_SELECTION_START % \ 46 | (self.docid, selection_id), self.start) 47 | pipe.set(TXT_SELECTION_END % \ 48 | (self.docid, selection_id), self.end) 49 | pipe.set(TXT_SELECTION_REF % \ 50 | (self.docid, selection_id), self.ref) 51 | pipe.lpush(TXT_SELECTIONS % \ 52 | self.docid, selection_id) 53 | result = pipe.execute() 54 | return selection_id 55 | else: 56 | return False 57 | 58 | def from_url(self, url): 59 | scheme, host, path, query, param = urlsplit(url) 60 | p = re.compile(MATCH_NODE) 61 | m = re.search(p, param) 62 | if m is not None: 63 | self.rel = m.group(1) 64 | self.start = int(m.group(2)) 65 | self.end = int(m.group(3)) 66 | 67 | 68 | class HtmlSelection(object): 69 | def __init__(self, docid = None, node = None, 70 | start = None, end = None, ref = None): 71 | self.docid = docid 72 | self.node = node 73 | self.start = start 74 | self.end = end 75 | self.ref = ref 76 | 77 | def _valid(self): 78 | #TODO 79 | return True 80 | 81 | def save(self, redis): 82 | if self._valid(): 83 | selection_id = redis.incr(NEXT_HTML_SELECTION % self.docid) 84 | pipe = redis.pipeline() 85 | pipe.set(HTML_SELECTION_NODE % \ 86 | (self.docid, selection_id), self.node) 87 | pipe.set(HTML_SELECTION_START % \ 88 | (self.docid, selection_id), self.start) 89 | pipe.set(HTML_SELECTION_END % \ 90 | (self.docid, selection_id), self.end) 91 | pipe.set(HTML_SELECTION_REF % \ 92 | (self.docid, selection_id), self.ref) 93 | pipe.lpush(HTML_SELECTIONS % \ 94 | self.docid, selection_id) 95 | result = pipe.execute() 96 | return selection_id 97 | else: 98 | return False 99 | return True 100 | 101 | class TextSelections(): 102 | def __init__(self, redis): 103 | self._redis = redis 104 | pass 105 | 106 | def from_document_id(self, docid): 107 | result = [] 108 | selections = self._redis.lrange(TXT_SELECTIONS % docid, 0, -1) 109 | if len(selections) == 0: 110 | return result 111 | pipe = self._redis.pipeline() 112 | for selection_id in selections: 113 | pipe.get(TXT_SELECTION_START % (docid, selection_id)) 114 | pipe.get(TXT_SELECTION_END % (docid, selection_id)) 115 | pipe.get(TXT_SELECTION_REF % (docid, selection_id)) 116 | start,end,ref = pipe.execute() 117 | t = TextSelection(docid, int(start), int(end), ref) 118 | result.append(t) 119 | return result 120 | 121 | class HtmlSelections(): 122 | def __init__(self, redis): 123 | self._redis = redis 124 | pass 125 | 126 | def from_document_id(self, docid): 127 | result = [] 128 | selections = self._redis.lrange(HTML_SELECTIONS % docid, 0, -1) 129 | if len(selections) == 0: 130 | return result 131 | pipe = self._redis.pipeline() 132 | for selection_id in selections: 133 | pipe.get(HTML_SELECTION_NODE % (docid, selection_id)) 134 | pipe.get(HTML_SELECTION_START % (docid, selection_id)) 135 | pipe.get(HTML_SELECTION_END % (docid, selection_id)) 136 | pipe.get(HTML_SELECTION_REF % (docid, selection_id)) 137 | node,start,end,ref = pipe.execute() 138 | t = HtmlSelection(docid, node, int(start), int(end), ref) 139 | result.append(t) 140 | return result 141 | 142 | -------------------------------------------------------------------------------- /static/andoc.css: -------------------------------------------------------------------------------- 1 | body { 2 | background: black url('back.png') top center no-repeat; 3 | color: white; 4 | margin: 0; padding: 0; 5 | font-family: Verdana; 6 | } 7 | 8 | a { color: white; text-decoration: none; } 9 | a:hover { text-decoration: underline; } 10 | a:visited { color: gray; } 11 | 12 | .c { clear: both; } 13 | 14 | button { cursor: pointer; } 15 | 16 | 17 | #wrapper { margin: 0; padding: 0; min-width: 800px; } 18 | #menu { 19 | position: absolute; 20 | top: 95px; 21 | height: 25px; 22 | padding: 10px 0 0 0; 23 | margin: 0; 24 | left: 3%; 25 | border-left:1px solid #666; 26 | border-right:1px solid #666; 27 | border-top:1px solid #666; 28 | border-top-left-radius: 1em; 29 | border-top-right-radius: 1em; 30 | background-color: rgba(255, 255, 255, 0.2); 31 | -webkit-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 32 | -moz-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 33 | box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 34 | } 35 | 36 | #menu ul, #menu ul li { display: inline; padding: 0; margin: 0; } 37 | #menu a { color: white; text-decoration: none; padding: 1em;} 38 | #menu a:hover { text-shadow: rgba(255, 255, 255, 0.9) 0 0 10px ; } 39 | #menu .active a { font-weight: bold; } 40 | 41 | #search { 42 | position: absolute; 43 | top: 40px; 44 | right: 2%; 45 | } 46 | #search input, #search button { 47 | color: white; 48 | background-color: rgba(0,0,0, 0.2); 49 | } 50 | #search input { 51 | border: 1px solid #666; 52 | border-top-left-radius: .8em; 53 | border-bottom-left-radius: .8em; 54 | width: 200px; 55 | height: 25px; 56 | } 57 | 58 | #search input:focus { 59 | background-color: black; 60 | } 61 | 62 | #search button { 63 | border: 1px solid #666; 64 | border-top-right-radius: .8em; 65 | border-bottom-right-radius: .8em; 66 | height: 25px; 67 | padding: 0 1em; 68 | } 69 | 70 | #events .event button:hover, 71 | #eventlist-action button:hover, 72 | #search button:hover { 73 | text-shadow: rgba(255, 255, 255, 0.9) 0 0 10px ;  74 | background-color: rgba(0,0,0, 0.5); 75 | border: 1px solid white; 76 | } 77 | 78 | #explore { 79 | position: absolute; 80 | top: 30px; 81 | height: 30px; 82 | padding: 12px 0 0 3%; 83 | margin: 0; 84 | width: 96%; 85 | border-top: 1px solid #666; 86 | border-bottom: 1px solid #666; 87 | border-right: 1px solid #666; 88 | border-left: none; 89 | border-top-right-radius: .8em; 90 | border-bottom-right-radius: .8em; 91 | background-color: rgba(255, 255, 255, 0.2); 92 | -webkit-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 93 | -moz-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 94 | box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 95 | } 96 | 97 | #explore ul, #explore ul li { display: inline; padding: 0; margin: 0; } 98 | #explore a { color: white; text-decoration: none; padding: 1em;} 99 | #explore a:hover { text-shadow: rgba(255, 255, 255, 0.9) 0 0 10px ; } 100 | 101 | #explore .active a { font-weight: bold; } 102 | 103 | #head div { display: none; } 104 | #head { height: 150px; margin: 0; padding: 0; } 105 | #head h3 { font-size: 1.3em; padding: 1em; } 106 | 107 | #content, #raw, #struc { 108 | position: absolute; 109 | top: 135px; 110 | left: 1%; 111 | margin: 0 0 1em 0; 112 | padding: 0; 113 | color: black; 114 | border:1px solid #999; 115 | background-color: rgba(255, 255, 255, 0.7); 116 | border-radius: 1em; 117 | -webkit-box-shadow: inset 3px 3px 10px rgba(0, 0, 0, 0.7); 118 | -moz-box-shadow: inset 3px 3px 10px rgba(0, 0, 0, 0.7); 119 | box-shadow: inset 3px 3px 10px rgba(0, 0, 0, 0.7); 120 | overflow: hidden; 121 | } 122 | 123 | #content { width: 60%; } 124 | #raw { width: 85%; } 125 | #struc { width: 85%; } 126 | #hlaction { 127 | position: absolute; 128 | z-index: 99; 129 | border-top: 1px solid black; 130 | border-bottom: 1px solid black; 131 | border-right: 1px solid black; 132 | width: 1em; 133 | } 134 | 135 | #index, #events, #personlist, #placelist, #datelist { 136 | position: relative; 137 | top: 130px; 138 | left: 0; 139 | margin: 0 0 1em 0; 140 | padding: 1%; 141 | color: white; 142 | border-top: 1px solid #666; 143 | border-bottom: 1px solid #666; 144 | border-right: 1px solid #666; 145 | border-left: none; 146 | border-top-right-radius: .8em; 147 | border-bottom-right-radius: .8em; 148 | background-color: rgba(255, 255, 255, 0.1); 149 | -webkit-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 150 | -moz-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 151 | box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 152 | overflow: hidden; 153 | width: 97%; 154 | } 155 | 156 | #personlist { 157 | position: relative; 158 | } 159 | 160 | #persongraph { 161 | display: block; 162 | clear: both; 163 | } 164 | 165 | #persongraph circle { 166 | stroke: rgba(255,255,255,0.5); 167 | stroke-width: .2em; 168 | } 169 | 170 | #persongraph circle:hover { 171 | stroke: rgba(255,255,255,0.8); 172 | stroke-width: 1em; 173 | fill: rgba(255,255,255,0.8); 174 | } 175 | 176 | #persongraph #edges line { 177 | stroke: rgba(255,255,255,0.5); 178 | stroke-width: .1em; 179 | } 180 | 181 | #persongraph #edges line:hover { 182 | stroke: rgba(255,255,255,0.8); 183 | } 184 | 185 | #persongraph #vertices text { 186 | cursor: pointer; 187 | text-anchor: middle; 188 | fill: rgba(255,255,255,1); 189 | text-shadow: rgba(255, 255, 255, 1) 1px 1px 5px ; 190 | } 191 | 192 | #persongraph text:hover { 193 | text-decoration: underline; 194 | } 195 | 196 | #meta { 197 | position: absolute; 198 | top: 155px; 199 | left: 63%; 200 | width: 33%; 201 | padding: 1%; 202 | border-top:1px solid #666; 203 | border-right:1px solid #666; 204 | border-bottom:1px solid #666; 205 | border-left:1px solid #666; 206 | border-top-right-radius: 1em; 207 | border-bottom-right-radius: 1em; 208 | background-color: rgba(255, 255, 255, 0.1); 209 | -webkit-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 210 | -moz-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 211 | box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 212 | } 213 | 214 | .docview #meta { 215 | left: 61%; 216 | width: 36%; 217 | z-index: -1; 218 | } 219 | 220 | #actions { 221 | position: fixed; 222 | top: 155px; 223 | left: 86%; 224 | width: 11%; 225 | min-width: 130px; 226 | padding: 1%; 227 | border-top:1px solid #666; 228 | border-right:1px solid #666; 229 | border-bottom:1px solid #666; 230 | border-left: 1px solid transparent; 231 | border-top-right-radius: 1em; 232 | border-bottom-right-radius: 1em; 233 | background-color: rgba(255, 255, 255, 0.2); 234 | -webkit-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 235 | -moz-box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 236 | box-shadow: inset 3px 3px 15px rgba(0, 0, 0, 0.7); 237 | } 238 | 239 | #actions button { 240 | font-size: 1em; 241 | display: block; 242 | border: none; 243 | margin: 1em 0; 244 | padding: 0 0 0 0.3em; 245 | min-width: 90%; 246 | text-align: left; 247 | background-color: transparent; 248 | color: white; 249 | cursor: pointer; 250 | } 251 | 252 | #actions button:hover { text-shadow: rgba(255, 255, 255, 0.9) 0 0 10px ; } 253 | 254 | #actions p { font-size: 0.8em; } 255 | #actions h3 { padding: 0; margin: 0; } 256 | #actions #person { border-left: 1em solid purple; } 257 | #actions #place { border-left: 1em solid green; } 258 | #actions #event { border-left: 1em solid royalblue; } 259 | #actions #date { border-left: 1em solid rgb(200,100,100); } 260 | 261 | #actions ul, 262 | #actions div { 263 | -webkit-transition: .5s ease-in-out; 264 | -moz-transition: .5s ease-in-out; 265 | transition: .5s ease-in-out; 266 | overflow: hidden; 267 | } 268 | 269 | #actions .history { 270 | height: 3em; 271 | } 272 | 273 | #actions .edit { 274 | height: 25em; 275 | } 276 | 277 | 278 | #actions .visible { 279 | height: 0em; 280 | opacity: 0; 281 | } 282 | 283 | #content h1, #content h2, #content h3, #content h4 { padding: 0em 0.5em; } 284 | 285 | #content p, #content li, #struc p, #struc li, #struc div div { 286 | font-size: 1.2em; 287 | padding: 1.0em; 288 | margin: 0.6em; 289 | background-color: transparent; 290 | line-height: 1.5em; 291 | } 292 | #struc pre, #content pre { display: none; } 293 | 294 | #struc h1, #struc h2, #struc h3, #struc h4 { padding: 0em 0.5em;} 295 | 296 | #struc span { color: black; } 297 | #struc span.person { border-bottom: 3px solid purple; } 298 | #struc span.date { border-bottom: 3px solid rgb(200,100,100); } 299 | #struc span.event { border-bottom: 3px solid royalblue; } 300 | #struc span.place { border-bottom: 3px solid green; } 301 | #struc span.undefined { border-bottom: 3px solid red; } 302 | #struc span.person:hover { background-color: purple; color: white; } 303 | #struc span.place:hover { background-color: green; color: white; } 304 | #struc span.date:hover { background-color: rgb(200,100,100); color: white; } 305 | #struc span.event:hover { background-color: royalblue; color: white; } 306 | 307 | #content p:hover { } 308 | #content p:hover .person { border-bottom: 1px solid yellow; } 309 | #content p:hover .place { border-bottom: 1px solid green; } 310 | #content p:hover .event { border-bottom: 2px solid lightblue; } 311 | #content p.person:hover { background-color: yellow; } 312 | #content p.person:before { content:'<'; color: yellow; } 313 | #content p.person:after { content:'>'; color: yellow; } 314 | #content p.place:hover { background-color: green; } 315 | #content p.place:before { content:'<'; color: green; } 316 | #content p.place:after { content:'>'; color: green; } 317 | #content p.event:hover { background-color: lightblue; } 318 | #content p.event:before { content:'<'; color: lightblue; } 319 | #content p.event:after { content:'>'; color: lightblue; } 320 | 321 | 322 | #meta h4, #meta h2, #meta h5 { margin: 0; } 323 | #meta ul { 324 | list-style-type: none; 325 | margin-left: 0px; 326 | padding-left: 10px; 327 | } 328 | 329 | #meta h3.person { color: yellow; } 330 | #meta h3.event { color: lightblue; } 331 | #meta h3.place { color: green; } 332 | 333 | #raw pre { margin: 1em 0 0 1em; line-height: 1.5em; max-width: 600px; overflow: hidden; } 334 | #raw #doc { color: black; } 335 | #raw table { margin: 1em; } 336 | #raw th { color: black; } 337 | #raw #hldoc span { background-color: yellow; color: black; } 338 | #raw #hldoc { color: gray; } 339 | #raw #hldoc span.marked { background-color: #cc0000; color: white; } 340 | 341 | #raw pre::selection, 342 | #raw pre::-moz-selection, 343 | #raw pre::-webkit-selection, 344 | #struc ::selection, 345 | #struc ::-moz-selection, 346 | #struc ::-webkit-selection 347 | { 348 | background:#cc0000; 349 | color:#fff; 350 | } 351 | 352 | #index.doclist .group .doc { 353 | color: rgba(255,255,255,0.5); 354 | background-color: rgba(0,0,0,0.8); 355 | display: block; 356 | float: left; 357 | border: 1px solid rgba(255,255,255,0.5); 358 | border-top-right-radius: .8em; 359 | -webkit-box-shadow: .2em .2em .5em black; 360 | -moz-box-shadow: .2em .2em .5em black; 361 | box-shadow: .2em .2em .5em black; 362 | margin: 0 5px 5px 0px; 363 | padding: .2em; 364 | min-width: 100px; 365 | max-width: 250px; 366 | -webkit-transition: .4s ease-in-out; 367 | -moz-transition: .4s ease-in-out; 368 | transition: .4s ease-in-out; 369 | overflow: hidden; 370 | z-index: -1; 371 | } 372 | 373 | /* 374 | #index.doclist .group .doc:hover { 375 | border: 1px solid rgba(255,255,255,1.0); 376 | -webkit-box-shadow: .5em .5em 1em black; 377 | -moz-box-shadow: .5em .5em 1em black; 378 | box-shadow: .5em .5em 1em black; 379 | margin: 0 20px 5px 0px; 380 | width: 400px; 381 | z-index: 99; 382 | background-color: rgba(0,0,0,1.0); 383 | } 384 | 385 | */ 386 | 387 | #index.doclist .group .doc .meta { 388 | -webkit-transition: .4s ease-in-out; 389 | -moz-transition: .4s ease-in-out; 390 | transition: .4s ease-in-out; 391 | position: relative; 392 | top: 0; 393 | left: 0; 394 | text-align: right; 395 | float: left; 396 | } 397 | #index.doclist .group .doc:hover .meta { 398 | } 399 | 400 | #index.doclist .group .doc .meta span { 401 | display: block; 402 | float: left; 403 | margin: 0 .3em; 404 | padding: .1em; 405 | } 406 | #index.doclist .group .doc .meta .person { 407 | color: purple; 408 | broder-color: purple; 409 | border-left: 0px solid; 410 | } 411 | 412 | #index.doclist .group .doc .meta .event { 413 | clear: left; 414 | color: royalblue; 415 | border-color: royalblue; 416 | border-left: 0px solid; 417 | } 418 | 419 | #index.doclist .group .doc .meta .date { 420 | color: rgb(200,100,100); 421 | border-color: rgb(200,100,100); 422 | border-left: 0px solid; 423 | } 424 | 425 | #index.doclist .group .doc .meta .place{ 426 | color: green; 427 | border-color: green; 428 | border-left: 0px solid; 429 | } 430 | 431 | #index.doclist .group .doc .meta .status{ 432 | border-left: 0px solid white; 433 | } 434 | 435 | 436 | #struc span.person:hover { background-color: purple; color: white; } 437 | #struc span.place:hover { background-color: green; color: white; } 438 | #struc span.date:hover { background-color: rgb(200,100,100); color: white; } 439 | #struc span.event:hover { background-color: royalblue; color: white; } 440 | #index.doclist h3 { 441 | clear: both; 442 | } 443 | 444 | #index h1, #index h2, #index h3 #index h4, 445 | #events h1, #events h2, #events h3, #events h4 { 446 | font-size: 1em; padding: 0; margin: .5em 0; 447 | } 448 | 449 | #events .event h2, #events .event h3, #events .event h4 { 450 | padding: .4em; 451 | margin: 0; 452 | color: #666; 453 | } 454 | #events .clicked h2 { 455 | color: white; 456 | } 457 | 458 | #personlist ul li, #placelist ul li, #datelist ul li { 459 | float: left; 460 | display: block; 461 | margin: 0 0.5em 0.5em 0; 462 | min-width: 200px; 463 | } 464 | #personlist p, #placelist p, #datelist p { 465 | clear: both; 466 | } 467 | 468 | #eventlist { 469 | width: 200%; 470 | height: 470px; 471 | overflow: hidden; 472 | left: 0px; 473 | position: relative; 474 | -webkit-transition: .5s ease-in-out; 475 | -moz-transition: .5s ease-in-out; 476 | transition: .5s ease-in-out; 477 | } 478 | #eventlist.clicked { 479 | left: -20px; 480 | } 481 | 482 | #eventlist-action { 483 | width: 100%; 484 | margin: 1em 0; 485 | text-align: center; 486 | } 487 | 488 | #eventlist-groupby-action button, 489 | #events .event button, 490 | #eventlist-action button { 491 | border: 1px solid #666; 492 | height: 25px; 493 | padding: 0 1em; 494 | background-color: transparent; 495 | color: white; 496 | width:100px; 497 | } 498 | #events .event button { 499 | float: right; 500 | height: 15px; 501 | width: 15px; 502 | border: 1px solid #666; 503 | } 504 | 505 | #eventlist-groupby-action button:hover, 506 | #events .event button:hover { 507 | border: 1px solid white; 508 | } 509 | 510 | 511 | #events .event button span { 512 | display: none; 513 | } 514 | 515 | #eventlist-action button.eventlist-date { 516 | width: auto; 517 | } 518 | 519 | #eventlist-right { 520 | float: right; 521 | border-top-right-radius: .8em; 522 | border-bottom-right-radius: .8em; 523 | } 524 | 525 | #eventlist-left { 526 | float: left; 527 | border-top-left-radius: .8em; 528 | border-bottom-left-radius: .8em; 529 | } 530 | 531 | #events { 532 | height: 75%; 533 | } 534 | 535 | #events .event:hover { 536 | border: 1px solid rgba(65,105,225,0.4); 537 | border-top: 15px solid rgba(65,105,225,0.6); 538 | border-top-left-radius: .4em; 539 | border-top-right-radius: .4em; 540 | } 541 | 542 | #events .event { 543 | position: absolute; 544 | width: 300px; 545 | height: 60px; 546 | z-index: 10; 547 | display: block; 548 | margin: 0; 549 | padding: 0; 550 | overflow: hidden; 551 | -webkit-transition: .5s ease-in-out; 552 | -moz-transition: .5s ease-in-out; 553 | transition: .5s ease-in-out; 554 | border: 1px solid rgba(65,105,225,0.2); 555 | border-top: 15px solid rgba(65,105,225,0.4); 556 | border-top-left-radius: .4em; 557 | border-top-right-radius: .4em; 558 | -webkit-box-shadow: 1px 1px 10px black; 559 | -moz-box-shadow: 1px 1px 10px black; 560 | background-color: rgba(0,0,0,0.2); 561 | } 562 | 563 | #events .clicked:hover, 564 | #events .clicked { 565 | z-index: 999; 566 | height: 400px; 567 | border: 1px solid rgba(65,105,225,0.4); 568 | border-top: 15px solid royalblue; 569 | background-color: rgba(6,6,6,1.0); 570 | -webkit-box-shadow: 10px 10px 15px black; 571 | -moz-box-shadow: 5px 5px 10px black; 572 | } 573 | 574 | #events .event .date { 575 | text-align: center; 576 | padding: .2em 0; 577 | } 578 | #events .event p.summary { 579 | padding: 0 1em; 580 | } 581 | 582 | #events .row { 583 | position: absolute; 584 | width: 100%; 585 | border: 1px solid rgba(0,255,0,0.1); 586 | border-left: 10px solid rgba(0,255,0,0.1); 587 | border-top-left-radius: .4em; 588 | border-bottom-left-radius: .4em; 589 | margin: 1em 0 1.2em 0; 590 | -webkit-box-shadow: inset 1px 1px 10px black; 591 | -moz-box-shadow: inset 10px 10px 15px rgba(0, 0, 0, 0.7); 592 | -webkit-transition: .3s ease-in-out; 593 | -moz-transition: .3s ease-in-out; 594 | } 595 | #events .row:hover { 596 | border: 1px solid rgba(0,255,0,0.5); 597 | border-left: 10px solid rgba(0,255,0,0.5); 598 | } 599 | 600 | #events .row h4 { 601 | font-size: 1em; 602 | font-weight: normal; 603 | padding: 0; margin: .2em 0 .2em .5em; 604 | } 605 | 606 | #events .event ul.personlist { 607 | list-style-type: none; 608 | padding: 0 0 .5em 1em; 609 | } 610 | #events .event ul.personlist li { 611 | margin: 0 0 .5em 0; 612 | } 613 | #events .event ul.personlist h4 { 614 | display: inline; 615 | font-weight: normal; 616 | margin: 0; padding: 0; 617 | } 618 | #events .event ul.personlist p { 619 | display: inline; 620 | } 621 | #events .rowlabel { 622 | position: absolute; 623 | background-color: rgba(0,255,0,0.1); 624 | } 625 | 626 | #events .row svg { 627 | position: absolute; 628 | } 629 | 630 | #events .row svg path { 631 | stroke: rgba(255,255,255,0.2); 632 | stroke-linejoin: round; 633 | stroke-width: 2px; 634 | fill: none; 635 | } 636 | 637 | #events .row:hover svg path { 638 | stroke: white; 639 | } 640 | 641 | #timeline svg { 642 | cursor: pointer; 643 | } 644 | 645 | #timeline svg path { 646 | stroke: rgba(255,255,255,0.2); 647 | stroke-width: 3px; 648 | } 649 | 650 | #timeline svg path:hover { 651 | stroke: white; 652 | } 653 | 654 | #timeline svg text { 655 | display: none; 656 | font-size: 90px; 657 | font-style: normal; 658 | font-family: monospace; 659 | transform: translate(0,50) rotate(0); 660 | fill: red; 661 | } 662 | 663 | 664 | 665 | /* d3 */ 666 | 667 | .node circle { 668 | fill: #fff; 669 | stroke: steelblue; 670 | stroke-width: 1.5px; 671 | } 672 | 673 | .node { 674 | font: 10px sans-serif; 675 | } 676 | 677 | .link { 678 | fill: none; 679 | stroke: #ccc; 680 | stroke-width: 1.5px; 681 | } 682 | 683 | #graphnote { 684 | display: none; 685 | position: absolute; 686 | left: 0px; 687 | top: 0px; 688 | 689 | z-index: 999; 690 | padding: .2em .5em; 691 | border: 1px solid rgba(65,105,225,0.4); 692 | border-top: 15px solid rgba(65,105,225,0.6); 693 | border-top-left-radius: .4em; 694 | border-top-right-radius: .4em; 695 | background-color: rgba(6,6,6,1.0); 696 | -webkit-box-shadow: 10px 10px 15px black; 697 | -moz-box-shadow: 5px 5px 10px black; 698 | -webkit-transition: .5s ease-in-out; 699 | -moz-transition: .5s ease-in-out; 700 | transition: .5s ease-in-out; 701 | } 702 | #graphnote h3 { 703 | padding: 0; margin: 0; 704 | } 705 | 706 | #graphnote:hover { 707 | border: 1px solid royalblue; 708 | border-top: 15px solid royalblue; 709 | } 710 | -------------------------------------------------------------------------------- /static/andoc.js: -------------------------------------------------------------------------------- 1 | var docid = document.URL.split('/')[5]; 2 | var backuphldoc; 3 | var backupstruc; 4 | var curleft = 0; 5 | 6 | $(document).ready(function() { 7 | 8 | $("#timeline svg path").click(function() { 9 | alert("yeah"); 10 | }); 11 | 12 | $("#persongraph text").click(function() { 13 | var gx = this.getClientRects()[0].left; 14 | var gy = this.getClientRects()[0].top; 15 | 16 | $("#graphnote").css("display", "block"); 17 | $("#graphnote").css("color", "white"); 18 | $("#graphnote").css("top", gy + "px"); 19 | $("#graphnote").css("left", gx + "px"); 20 | 21 | }); 22 | 23 | $("#eventlist-left").click(function() { 24 | curleft = curleft + $("#events").width()/2; 25 | if (curleft <= 0) { 26 | $("#eventlist").css("left", curleft + "px"); 27 | } else { 28 | curleft = 0; 29 | } 30 | }); 31 | $("#eventlist-right").click(function() { 32 | curleft = curleft - $("#events").width()/2; 33 | $("#eventlist").css("left", curleft + "px"); 34 | }); 35 | 36 | $("#event-action-show").click(function() { 37 | console.log("xx"); 38 | $("#event-1").toggleClass('clicked'); 39 | }); 40 | 41 | $("#head1").click(function() { sendSelection('h1'); }); 42 | $("#head2").click(function() { sendSelection('h2'); }); 43 | $("#head3").click(function() { sendSelection('h3'); }); 44 | $("#head4").click(function() { sendSelection('h4'); }); 45 | $("#para").click(function() { sendSelection('p'); }); 46 | $("#span").click(function() { sendSelection('span'); }); 47 | $("#div").click(function() { sendSelection('div'); }); 48 | $("#li").click(function() { sendSelection('li'); }); 49 | $("#ul").click(function() { sendSelection('ul'); }); 50 | 51 | $("#hldoc").mousedown(function() { return false; }); 52 | 53 | $("#selections").click(function() { alert( selections ) } ); 54 | 55 | $("#person").click(function() { sendTriple('person'); }); 56 | $("#place").click(function() { sendTriple('place'); }); 57 | $("#event").click(function() { sendTriple('event'); }); 58 | $("#date").click(function() { sendTriple('date'); }); 59 | 60 | $("#action").click(function() { 61 | $("#actions div.edit").toggleClass('visible'); 62 | $("#actions ul.history").toggleClass('visible'); 63 | }); 64 | $("#history").click(function() { 65 | $("#actions div.edit").toggleClass('visible'); 66 | $("#actions ul.history").toggleClass('visible'); 67 | }); 68 | 69 | var hldoc = document.getElementById('hldoc'); 70 | if (hldoc) { 71 | backuphldoc = hldoc.cloneNode(true); 72 | loadTextSelections(docid); 73 | } 74 | 75 | var struc = document.getElementById('struc'); 76 | if (struc) { 77 | backupstruc = struc.cloneNode(true); 78 | loadTriples(docid); 79 | } 80 | 81 | /* 82 | var tree = d3.layout.tree() 83 | .size([0,500]); 84 | 85 | d3.json("/data/event-tree.json", function(json) { 86 | var nodes = tree.nodes(json); 87 | 88 | var row1 = d3.select(".row"); 89 | row1.selectAll("div.event") 90 | .data(nodes) 91 | .enter().append("div") 92 | .attr("class","event") 93 | .attr("style", function(d) { 94 | return "left:" + d.y + "px; "; 95 | }) 96 | .text(function(d){ return d.name; }); 97 | 98 | }); 99 | */ 100 | }); 101 | 102 | function markSelection(node, start, end, selclass) { 103 | console.log("node:" + node); 104 | console.log("start:" + start + " end:" + end); 105 | console.log("selection class: " + selclass); 106 | var span = document.createElement("span"); 107 | span.className = selclass; 108 | var range = document.createRange(); 109 | range.setStart(node.firstChild, start); 110 | range.setEnd(node.firstChild, end); 111 | span.appendChild(document.createTextNode(range.toString())); 112 | range.deleteContents(); 113 | range.insertNode(span); 114 | console.log("marked selection"); 115 | console.log(span); 116 | } 117 | 118 | function drawSelections(selections) { 119 | 120 | /* reset the doc so we can apply the selections */ 121 | var hldoc = document.getElementById('hldoc'); 122 | var hlparent = hldoc.parentNode; 123 | var newhl = backuphldoc.cloneNode(true); 124 | 125 | /* 126 | 127 | var pat1 = /&[A-Z]{2,6};/gi; 128 | var pat2 = /&#[0-9]{1,5};/g; 129 | var matches = Array() 130 | while((result = pat1.exec(backuphldoc.innerHTML)) != null) { 131 | result.index 132 | result[0].toString().length 133 | } 134 | 135 | */ 136 | hlparent.removeChild(hldoc); 137 | hlparent.appendChild(newhl); 138 | console.log('hl reset'); 139 | console.log(newhl); 140 | 141 | 142 | if (newhl !== null) { 143 | for (var i = 0 ; i < selections.length; i++) { 144 | var selection = selections[i]; 145 | console.log(selection.start); 146 | console.log(selection.end); 147 | var selclass = "selection s" + selection.start + "e" + selection.end; 148 | markSelection(newhl, selection.start, selection.end, selclass); 149 | } 150 | } 151 | $("#hldoc").mousedown(function() { return false; }); 152 | $(".selection").click(function() { 153 | $(this).addClass("marked"); 154 | var action_top = $(this).offset().top; 155 | var action_left = ($("#hldoc").offset().left + $("#hldoc").width() + 10); 156 | var action_height = $(this).height(); 157 | $("#hlaction").attr("style", 158 | "top: " + action_top + "px; " + 159 | "left: " + action_left + "px;" + 160 | "height: " + action_height + "px;" 161 | ); 162 | }); 163 | } 164 | 165 | function loadTextSelections(id) { 166 | $.getJSON('/rest/selection/list/' + id, function(data) { 167 | console.log(data); 168 | var selections = []; 169 | $.each(data, function(key, value) { 170 | var text_selection = new Object(); 171 | text_selection.start = value[0]; 172 | text_selection.end = value[1] - 1; 173 | text_selection.ref = value[2]; 174 | selections.push(text_selection); 175 | }); 176 | console.log("loaded selections"); 177 | console.log(selections); 178 | drawSelections(selections); 179 | }); 180 | } 181 | 182 | 183 | function sendSelection(type) { 184 | var selection = window.getSelection(); 185 | var range = selection.getRangeAt(0); 186 | var data0 = { start: range.startOffset, 187 | end: range.endOffset, 188 | ref: "http://www.w3.org/1999/xhtml/#" + type }; 189 | $.ajax({ 190 | type: 'POST', 191 | url: "/rest/selection/add/" + docid, 192 | contentType: "application/json; charset=utf-8", 193 | processData: false, 194 | dataType: "json", 195 | data: JSON.stringify(data0), 196 | success: function(data) { 197 | console.log(data); 198 | loadTextSelections(docid); 199 | } 200 | }); 201 | } 202 | 203 | function sendTriple(pre) { 204 | 205 | var offset = 0; 206 | var selection = window.getSelection(); 207 | var range = selection.getRangeAt(0); 208 | var start = range.startOffset; 209 | var end = range.endOffset; 210 | 211 | if ( selection.baseNode.parentNode.hasChildNodes() ) { 212 | for ( var i = 0 ; selection.baseNode.parentNode.childNodes.length > i ; i++ ) { 213 | var cnode = selection.baseNode.parentNode.childNodes[i]; 214 | if (cnode == selection.baseNode) { 215 | break; 216 | } 217 | if (cnode.nodeType == document.TEXT_NODE) { 218 | console.log(offset, cnode.length); 219 | offset = offset + cnode.length; 220 | } 221 | if (cnode.nodeType == document.ELEMENT_NODE) { 222 | console.log(offset, cnode.textContent.length); 223 | offset = offset + cnode.textContent.length; 224 | } 225 | } 226 | } 227 | 228 | start = start + offset; 229 | end = end + offset; 230 | 231 | var trSubject = getNodePath(selection.baseNode.parentNode); 232 | var trPredicate = pre; 233 | var trObject = range.toString(); 234 | var triple = { s: trSubject, p: trPredicate, o: trObject, start: start, end: end }; 235 | console.log("sending triple"); 236 | console.log(triple); 237 | $.ajax({ 238 | type: 'POST', 239 | url: "/rest/triple/add/" + docid, 240 | contentType: "application/json; charset=utf-8", 241 | processData: false, 242 | dataType: "text", 243 | data: JSON.stringify(triple), 244 | success: function(data) { 245 | loadTriples(docid); 246 | } 247 | }); 248 | } 249 | 250 | function loadTriples(id) { 251 | $.getJSON('/rest/triple/list/' + id, function(data) { 252 | var triples = []; 253 | $.each(data, function(key, value) { 254 | var triple = new Object(); 255 | triple.s = value[0]; 256 | triple.start = value[1]; 257 | triple.end = value[2]; 258 | triple.p = value[3]; 259 | triple.o = value[4]; 260 | triples.push(triple); 261 | }); 262 | drawTriples(triples); 263 | }); 264 | } 265 | 266 | function drawTriples(triples) { 267 | var struc = document.getElementById('struc'); 268 | var strucparent = struc.parentNode; 269 | var newstruc = backupstruc.cloneNode(true); 270 | strucparent.removeChild(struc); 271 | strucparent.appendChild(newstruc); 272 | console.log('reset'); 273 | if (newstruc !== null) { 274 | for (var i = 0; i < triples.length; i++) { 275 | var triple = triples[i]; 276 | var n = getNodeFromPath(triple.s); 277 | var triclass = "triple t" + triple.start + "e" + triple.end + " " + triple.p; 278 | markSelection(n, triple.start, triple.end, triclass); 279 | } 280 | } 281 | } 282 | 283 | function getNodePath(el) { 284 | var path = []; 285 | do { 286 | if (el.className.length > 0) { 287 | path.unshift(el.nodeName.toLowerCase() + '.' + el.className); 288 | } 289 | } while ((el.id != 'struc') && (el = el.parentNode)); 290 | 291 | return document.URL + '#' + path.join("/"); 292 | } 293 | 294 | function getNodeFromPath(path) { 295 | console.log(path); 296 | var className = path.split('#')[1].split('.')[1].split('/')[0]; 297 | console.log(className); 298 | 299 | return document.getElementsByClassName(className)[0]; 300 | } 301 | 302 | -------------------------------------------------------------------------------- /static/back.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/endpnt/andoc/e4cba7995865ca7a49e6a573518f1de7d88dbe3f/static/back.png -------------------------------------------------------------------------------- /static/d3.layout.min.js: -------------------------------------------------------------------------------- 1 | (function(){function bh(a,b,c){return a._tree.ancestor.parent==b.parent?a._tree.ancestor:c}function bg(a,b,c){a=a._tree,b=b._tree;var d=c/(b.number-a.number);a.change+=d,b.change-=d,b.shift+=c,b.prelim+=c,b.mod+=c}function bf(a){var b=0,c=0,d=a.children,e=d.length,f;while(--e>=0)f=d[e]._tree,f.prelim+=b,f.mod+=b,b+=f.shift+(c+=f.change)}function be(a,b){function c(a,d){var e=a.children;if(e){var f,g=null,h=-1,i=e.length;while(++h0&&(a=d)}return a}function _(a){return a.children?a.children[a.children.length-1]:a._tree.thread}function $(a){return a.children?a.children[0]:a._tree.thread}function Z(a,b){return a.parent==b.parent?1:2}function Y(a){var b=a.children;return b?Y(b[b.length-1]):a}function X(a){var b=a.children;return b?X(b[0]):a}function W(a){return a.reduce(function(a,b){return a+b.x},0)/a.length}function V(a){return 1+d3.max(a,function(a){return a.y})}function U(a,b,c){var d=b.r+c.r,e=a.r+c.r,f=b.x-a.x,g=b.y-a.y,h=Math.sqrt(f*f+g*g),i=(e*e+h*h-d*d)/(2*e*h),j=Math.acos(i),k=i*e,l=Math.sin(j)*e;f/=h,g/=h,c.x=a.x+k*f+l*g,c.y=a.y+k*g-l*f}function T(a,b,c,d){var e=a.children;a.x=b+=d*a.x,a.y=c+=d*a.y,a.r*=d;if(e){var f=-1,g=e.length;while(++f1){h=a[1],h.x=h.r,h.y=0,l(h);if(f>2){i=a[2],U(g,h,i),l(i),M(g,i),g._pack_prev=i,M(i,h),h=g._pack_next;for(var m=3;m0?(N(g,j),h=j,m--):(N(j,h),g=j,m--)}}}var q=(b+c)/2,r=(d+e)/2,s=0;for(var m=0;m.001}function N(a,b){a._pack_next=b,b._pack_prev=a}function M(a,b){var c=a._pack_next;a._pack_next=b,b._pack_prev=a,b._pack_next=c,c._pack_prev=b}function L(a,b){return a.value-b.value}function J(a){return d3.merge(a.map(function(a){return(a.children||[]).map(function(b){return{source:a,target:b}})}))}function I(a,b){return b.value-a.value}function H(a){return a.value}function G(a){return a.children}function F(a,b){a.sort=d3.rebind(a,b.sort),a.children=d3.rebind(a,b.children),a.links=J,a.value=d3.rebind(a,b.value),a.nodes=function(b){K=!0;return(a.nodes=a)(b)};return a}function E(a){return[d3.min(a),d3.max(a)]}function D(a,b){var c=-1,d=+a[0],e=(a[1]-d)/b,f=[];while(++c<=b)f[c]=e*c+d;return f}function C(a,b){return D(a,Math.ceil(Math.log(b.length)/Math.LN2+1))}function B(a,b){return a+b[1]}function A(a){return a.reduce(B,0)}function z(a){var b=1,c=0,d=a[0][1],e,f=a.length;for(;bd&&(c=b,d=e);return c}function w(a,b,c){a.y0=b,a.y=c}function v(a){return a.y}function u(a){return a.x}function t(a){return 1}function s(a){return 20}function r(a){var b=0,c=0;a.count=0,a.leaf||a.nodes.forEach(function(d){r(d),a.count+=d.count,b+=d.count*d.cx,c+=d.count*d.cy}),a.point&&(a.leaf||(a.point.x+=Math.random()-.5,a.point.y+=Math.random()-.5),a.count++,b+=a.point.x,c+=a.point.y),a.cx=b/a.count,a.cy=c/a.count}function q(){d3.event.stopPropagation(),d3.event.preventDefault()}function p(){i&&(q(),i=!1)}function o(){!f||(g&&(i=!0,q()),d3.event.type==="mouseup"&&n(),f.fixed=!1,e=h=f=j=null)}function n(){if(!!f){var a=j.parentNode;if(!a){f.fixed=!1,h=f=j=null;return}var b=m(a);g=!0,f.px=b[0]-h[0],f.py=b[1]-h[1],q(),e.resume()}}function m(a){return d3.event.touches?d3.svg.touches(a)[0]:d3.svg.mouse(a)}function l(a){a!==f&&(a.fixed=!1)}function k(a){a.fixed=!0}function c(a,c){if(a===c)return a;var d=b(a),e=b(c),f=d.pop(),g=e.pop(),h=null;while(f===g)h=f,f=d.pop(),g=e.pop();return h}function b(a){var b=[],c=a.parent;while(c!=null)b.push(a),a=c,c=c.parent;b.push(a);return b}function a(a){var b=a.source,d=a.target,e=c(b,d),f=[b];while(b!==e)b=b.parent,f.push(b);var g=f.length;while(d!==e)f.splice(g,0,d),d=d.parent;return f}d3.layout={},d3.layout.bundle=function(){return function(b){var c=[],d=-1,e=b.length;while(++de&&(e=h),d.push(h)}for(g=0;g=i[0]&&o<=i[1]&&(k=g[d3.bisect(j,o,1,m)-1],k.y+=n,k.push(e[f]));return g}var a=!0,b=Number,c=E,d=C;e.value=function(a){if(!arguments.length)return b;b=a;return e},e.range=function(a){if(!arguments.length)return c;c=d3.functor(a);return e},e.bins=function(a){if(!arguments.length)return d;d=typeof a=="number"?function(b){return D(b,a)}:d3.functor(a);return e},e.frequency=function(b){if(!arguments.length)return a;a=!!b;return e};return e},d3.layout.hierarchy=function(){function g(a){var b=[];e(a,0,b);return b}function f(a,b){var d=a.children,e=0;if(d){var h=-1,i=d.length,j=b+1;while(++h0&&(bg(bh(g,a,d),a,m),i+=m,j+=m),k+=g._tree.mod,i+=e._tree.mod,l+=h._tree.mod,j+=f._tree.mod;g&&!_(f)&&(f._tree.thread=g,f._tree.mod+=k-j),e&&!$(h)&&(h._tree.thread=e,h._tree.mod+=i-l,d=a)}return d}function i(a,b){a.x=a._tree.prelim+b;var c=a.children;if(c){var d=-1,e=c.length;b+=a._tree.mod;while(++dd.dy)j=d.dy;while(++fd.dx)j=d.dx;while(++fe&&(e=d);c*=c,b*=b;return Math.max(b*e*f/c,c/(b*g*f))}function i(a){if(!!a.children){var b={x:a.x,y:a.y,dx:a.dx,dy:a.dy},c=a.children.slice(),d,e=[];e.area=0;while(d=c.pop())e.push(d),e.area+=d.area,d.z!=null&&(k(e,d.z?b.dx:b.dy,b,!c.length),e.length=e.area=0);a.children.forEach(i)}}function h(a){if(!!a.children){var b={x:a.x,y:a.y,dx:a.dx,dy:a.dy},c=[],d=a.children.slice(),e,f=Infinity,g,i=Math.min(b.dx,b.dy),l;c.area=0;while((l=d.length)>0)c.push(e=d[l-1]),c.area+=e.area,(g=j(c,i))<=f?(d.pop(),f=g):(c.area-=c.pop().area,k(c,i,b,!1),i=Math.min(b.dx,b.dy),c.length=c.area=0,f=Infinity);c.length&&(k(c,i,b,!0),c.length=c.area=0),a.children.forEach(h)}}function g(a,b){var c=a.children;a.area=a.value*b;if(c){var d=-1,e=c.length;while(++d 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 |
15 | 23 |
24 | 30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | 38 | 39 | -------------------------------------------------------------------------------- /templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% block head %} 4 | 5 | {{ title }} 6 | 7 | 8 | 9 | 10 | 11 | {% endblock %} 12 | 13 | {% block wrapper %} 14 |
15 | {% block explore %} 16 |
17 | 25 |
26 | {% endblock %} 27 | {% block menu %} 28 | 35 | {% endblock %} 36 | {% block search %} 37 | 43 | {% endblock %} 44 |
45 | {% endblock %} 46 | {% block content %} 47 | {% endblock %} 48 | 49 | 50 | -------------------------------------------------------------------------------- /templates/date.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% block menu %}{% endblock %} 3 | {% block explore %} 4 |
5 | 13 |
14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /templates/date/list.html: -------------------------------------------------------------------------------- 1 | {% extends 'date.html' %} 2 | {% block content %} 3 |
4 |

TODO: put fancy timeline here

5 | 10 |
11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /templates/default.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block menu %}{% endblock %} 3 | {% block content %} 4 |
5 |

Welcome

6 |

Hello World

7 |

Documents

8 |
9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /templates/doc.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% block explore %} 3 |
4 | 12 |
13 | {% endblock %} 14 | {% block menu %} 15 | 25 | {% endblock %} 26 | -------------------------------------------------------------------------------- /templates/doc/list.html: -------------------------------------------------------------------------------- 1 | {% extends 'doc.html' %} 2 | {% block content %} 3 |
4 |

G1

5 |
6 | {% for doc in documents %} 7 |
8 |
9 | 10 | #{{ doc.id }} 11 | 12 | 0% 13 | 15 | {{ doc.event_count|int }} 16 | 17 | 19 | {{ doc.date_count|int }} 20 | 21 | 23 | {{ doc.person_count|int }} 24 | 25 | 27 | {{ doc.place_count|int }} 28 | 29 |
30 |
31 | {% endfor %} 32 |
33 |
34 | {% endblock %} 35 | -------------------------------------------------------------------------------- /templates/doc/raw.html: -------------------------------------------------------------------------------- 1 | {% extends 'doc.html' %} 2 | {% block menu %} 3 | 10 | {% endblock %} 11 | {% block content %} 12 |
13 |

Actions

14 |
15 |

Select area of the orginal document and apply structure information

16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 |

History

28 |
    29 |
  • rev. 1
  • 30 |
  • rev. 2
  • 31 |
32 |
33 |
34 |
35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 46 | 47 | 48 | 49 | 50 |
OriginalMarked
{{ doc.content|replace('<','[')|replace('>',']') }}
{{ doc.content|replace('<','[')|replace('>',']') }}
51 |
52 | {% endblock %} 53 | -------------------------------------------------------------------------------- /templates/doc/struc.html: -------------------------------------------------------------------------------- 1 | {% extends 'doc.html' %} 2 | {% block menu %} 3 | 10 | {% endblock %} 11 | {% block content %} 12 |
13 | {{ struc }} 14 |
15 |
16 |

Actions

17 |

Select element or area and apply semantic information

18 | 19 | 20 | 21 | 22 |

History

23 |
    24 |
25 |
26 | {% endblock %} 27 | -------------------------------------------------------------------------------- /templates/doc/view.html: -------------------------------------------------------------------------------- 1 | {% extends 'doc.html' %} 2 | {% block menu %} 3 | 10 | {% endblock %} 11 | {% block content %} 12 |
{{ content }}
13 |
{{ meta }}
14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /templates/event.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% block menu %}{% endblock %} 3 | {% block explore %} 4 |
5 | 13 |
14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /templates/event/list.html: -------------------------------------------------------------------------------- 1 | {% extends 'event.html' %} 2 | {% block content %} 3 |
4 |
5 |
6 | Group by 7 | 8 | 9 | 10 |
11 |
12 | 14 | Mars 16 | 17 | 18 |
19 |
1. Jan 2011
20 | 21 |

Main Event Title 1

22 |

Summary: Vestibulum et metus tortor, nec congue magna. Maecenas vestibulum, tortor ut euismod mattis, diam metus aliquam lorem, a tristique orci tellus ut turpis.

23 |

Persons

24 |
    25 |
  • 26 |

    Person 1

    27 |

    Description for Person 1. Vestibulum et metus tortor, nec congue magna.

    28 |
  • 29 |
  • 30 |

    Person 2

    31 |

    Description for Person 2. Vestibulum et metus tortor, nec congue magna.

    32 |
  • 33 |
34 |
35 | 36 | 37 | 38 |
39 |
4. Jan 2011
40 |

Event Title 1

41 |

Test

42 |
43 | 44 | 45 | 46 |
47 |
15. Jan 2011
48 |

Event Title 1

49 |

Bla

50 |
51 |
52 |
53 | 55 | Planet Earth 57 | 58 | 60 | 61 | 62 | 63 | 64 |
65 |
1. Jan 2011
66 |

Event Title 1

67 |

Text

68 |
69 | 70 | 71 | 72 |
73 |
7. Jan 2011
74 |

Event Title 2

75 |

Text

76 |
77 | 78 | 79 | 80 |
81 |
14. Jan 2011
82 |

Event Title 3

83 |

Text

84 |
85 | 86 | 87 | 88 |
89 |
21. Jan 2011
90 |

Event Title 4

91 |

Text

92 |
93 | 94 |
95 |
4. Jan 2011
96 |

Event Title 5

97 |

Text

98 |
99 | 100 | 101 | 102 | 103 | 104 | 105 |
106 |
10. Jan 2011
107 |

Event Title 6

108 |

Text

109 |
110 | 111 |
112 |
12. Jan 2011
113 |

Event Title 7

114 |

Text

115 |
116 |
117 |
118 |
119 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 |
159 |
160 | 161 | 164 | 167 | 170 | 173 | 176 | 179 | 182 | 185 | 188 | 191 | 192 |
193 |
194 | {% endblock %} 195 | -------------------------------------------------------------------------------- /templates/person.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% block explore %} 3 |
4 | 12 |
13 | {% endblock %} 14 | -------------------------------------------------------------------------------- /templates/person/graph.html: -------------------------------------------------------------------------------- 1 | {% extends 'person.html' %} 2 | {% block menu %}{% endblock%} 3 | {% block content %} 4 |
5 | 6 | 7 | 8 | 9 | {% for e in edges %} 10 | 12 | {% endfor %} 13 | 14 | 15 | {% for v in vertices %} 16 | 17 | 18 | {{ v.label }} 19 | 20 | {% endfor %} 21 | 22 | 23 | 24 |
25 | {% endblock %} 26 | -------------------------------------------------------------------------------- /templates/person/list.html: -------------------------------------------------------------------------------- 1 | {% extends 'person.html' %} 2 | {% block menu %}{% endblock%} 3 | {% block content %} 4 |

Person A

Description

5 |
6 | 11 |

View as Graph

12 |
13 | {% endblock%} 14 | -------------------------------------------------------------------------------- /templates/place.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% block menu %}{% endblock %} 3 | {% block explore %} 4 |
5 | 13 |
14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /templates/place/list.html: -------------------------------------------------------------------------------- 1 | {% extends 'place.html' %} 2 | {% block content %} 3 |
4 |

TODO: put fancy map here

5 | 10 |
11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /templates/search/result.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% block menu %}{% endblock %} 3 | {% block content %} 4 |
5 | {{ result }} 6 |
7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /triple.py: -------------------------------------------------------------------------------- 1 | from hashlib import sha1 2 | from rediskeys import * 3 | 4 | class Triple(object): 5 | def __init__(self, subject = None, pre = None, object = None): 6 | self.subject = subject 7 | self.pre = pre 8 | self.object = object 9 | 10 | def _valid(self): 11 | #TODO 12 | return True 13 | 14 | def save(self, redis): 15 | if self._valid(): 16 | # create a hash and uniqid for the object string 17 | obj_hash = sha1(self.object).hexdigest() 18 | print obj_hash 19 | obj_id = redis.get(OBJ_ID % (self.pre, obj_hash)) 20 | if obj_id is None: 21 | obj_id = redis.incr(NEXT_OBJ_ID % self.pre) 22 | if redis.setnx(OBJ_ID % (self.pre,obj_hash), obj_id) == 0: 23 | obj_id = redis.get(OBJ_ID % (self.pre, obj_hash)) 24 | 25 | pipe = redis.pipeline() 26 | # store the object by predicate and object id 27 | pipe.set(OBJECT_VALUE % (self.pre, obj_id), self.object) 28 | 29 | # keep a list of all subjects for this predicate 30 | pipe.sadd(PRE_SUBJECTS % self.pre, self.subject) 31 | 32 | # keep a sorted set of all object ids for this predicate 33 | # also add score to count how often each object 34 | # is mentioned in all documents 35 | pipe.zincrby(PRE_OBJECTS % self.pre, obj_id, 1) 36 | 37 | # save the triple as redis hash if not exist 38 | pipe.hsetnx(self.subject, self.pre, obj_id) 39 | 40 | val_res, pre_res, obj_cnt, trip_res = pipe.execute() 41 | # TODO error check 42 | 43 | return True 44 | else: 45 | return False 46 | 47 | def load(self, redis, subject): 48 | self.subject = subject 49 | self.pre, self.object = redis.hgetall(subject) 50 | 51 | class Triples(object): 52 | def __init__(self, redis): 53 | self._redis = redis 54 | 55 | def from_subject(self, subject): 56 | result = [] 57 | r = self._redis.hgetall(subject) 58 | if r == 0: 59 | return result 60 | for pre, obj_id in r.iteritems(): 61 | object = self._redis.get(OBJECT_VALUE % (pre, obj_id)) 62 | t = Triple(subject, pre, object) 63 | result.append(t) 64 | return result 65 | 66 | def from_predicate(self, pre): 67 | result = [] 68 | subjects = self._redis.smembers(PRE_SUBJECTS % pre) 69 | if subjects == 0: 70 | return result 71 | 72 | for subject in subjects: 73 | t = self.from_subject(subject) 74 | result.extend(t) 75 | return result 76 | 77 | def from_document_id(self, id): 78 | return [] 79 | 80 | --------------------------------------------------------------------------------