├── .gitignore ├── .project ├── .pydevproject ├── .settings ├── org.eclipse.core.resources.prefs └── org.eclipse.ltk.core.refactoring.prefs ├── 2014goals-features-scope-discussion.txt ├── 20150105-flashgrab-github-question.txt ├── CopyFilesToAddons.bat ├── CopyFilesToAddons.sh ├── ExcludedFilesLinux.txt ├── ExcludedFilesWindows.txt ├── How-to-limit-to-a-subset-of-the-lexicon.md ├── LICENSE ├── README-manual-build.txt ├── README.md ├── User 1 └── collection.media │ └── blank.txt ├── __init__.py ├── anki notes suggestions.txt ├── flashgrab-v0.8.7.zip ├── icons ├── flashgrid-flashgrab-icons.png ├── flashgrid-flashgrab-icons.svg └── readme.txt ├── readme-developers.txt ├── sample-src ├── Indonesian Vocab 2015-01-30 1445 tagged-in-notes.fwbackup ├── Indonesian Vocab 2015-02-04 2149 updated media.fwbackup └── readme-indo-sample.txt ├── syncx_local.py ├── syncxml ├── README.md ├── SyncFromXML.py ├── SyncFromXML_config_default.txt ├── SyncFromXML_help.pdf ├── SyncFromXML_log.txt ├── SyncFromXML_readme.txt ├── __init__.py ├── anki_util.py ├── docsrc │ ├── SyncFromXML_help.odt │ ├── checklog.png │ ├── customtype.png │ ├── customtype_sm.png │ ├── flex_export.png │ ├── menu.png │ ├── report.png │ ├── sync-report.png │ ├── wizard-done.png │ ├── wizard-examples.png │ ├── wizard1.png │ └── wizard2.png ├── file_util.py ├── flex_util.py ├── glob.py ├── local_launch.py ├── logg.py ├── samples │ ├── Lindu-Test.lift │ ├── audio │ │ ├── 634710572710000000nagali.wav │ │ └── 634710573956406250A LexExampleSentence.wav │ ├── indonesian-lift-dictionary.apkg │ ├── lift-dictionary.apkg │ └── pictures │ │ └── bersih.gif ├── syncxml.py └── xml_util.py ├── tests └── Moma │ ├── Moma.WeSayConfig │ ├── Moma.lift │ ├── audio │ ├── adat-63531587437330.wav │ ├── loka-63531587541460.wav │ ├── loka-63531587545733.wav │ ├── mata-63531587590792.wav │ └── mata-63531587597879.wav │ └── pictures │ ├── cultural law.png │ └── sdd.png ├── this-is-an-eclipse-pydev-project.txt ├── xml ├── __init__.py ├── dom │ ├── NodeFilter.py │ ├── __init__.py │ ├── domreg.py │ ├── expatbuilder.py │ ├── minicompat.py │ ├── minidom.py │ ├── pulldom.py │ └── xmlbuilder.py ├── etree │ ├── ElementInclude.py │ ├── ElementPath.py │ ├── ElementTree.py │ ├── __init__.py │ └── cElementTree.py ├── parsers │ ├── __init__.py │ └── expat.py └── sax │ ├── __init__.py │ ├── _exceptions.py │ ├── expatreader.py │ ├── handler.py │ ├── saxutils.py │ └── xmlreader.py └── xpath ├── __init__.py ├── exceptions.py ├── expr.py ├── parser.g ├── parser.py ├── readme.txt └── yappsrt.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | flashgrab 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /${PROJECT_DIR_NAME} 5 | 6 | python 2.7 7 | Default 8 | 9 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | #Wed Jan 29 10:56:06 CST 2014 2 | eclipse.preferences.version=1 3 | encoding/=UTF-8 4 | -------------------------------------------------------------------------------- /.settings/org.eclipse.ltk.core.refactoring.prefs: -------------------------------------------------------------------------------- 1 | #Thu Aug 30 15:34:08 SGT 2012 2 | eclipse.preferences.version=1 3 | org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false 4 | -------------------------------------------------------------------------------- /2014goals-features-scope-discussion.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/2014goals-features-scope-discussion.txt -------------------------------------------------------------------------------- /20150105-flashgrab-github-question.txt: -------------------------------------------------------------------------------- 1 | My wiki's connection to my repository appears to be broken. 2 | 3 | I still have these files in the repo but they no longer update the wiki: 4 | Home.md 5 | How-to-limit-to-a-subset-of-the-lexicon.md 6 | README.md 7 | 8 | Likewise, when I added a new wiki page (How-to-run-and-debug-Anki-in-a-Linux-virtual-machine-or-in-Windows) it didn't show up in the repo. 9 | 10 | I'm not sure if this was a recent GitHub change, or whether it's because the project was recently forked to: 11 | https://github.com/sillsdev/flashgrab 12 | 13 | (I've deleted the original repo: https://github.com/pconstrictor/lexml2anki) 14 | 15 | I see that I can clone the wiki as a separate repository--is that a new requirement? If so, I guess I should just delete those three files from the main repo? 16 | 17 | -Jon 18 | -------------------------------------------------------------------------------- /CopyFilesToAddons.bat: -------------------------------------------------------------------------------- 1 | ECHO OFF 2 | REM A Windows 'build' script. Run this after editing the addon's files. Then restart Anki. 3 | REM Don't make edits over there in addons directly, as VCS won't see them, and an Anki or addon upgrade 4 | REM might destroy them. 5 | ECHO ON 6 | 7 | REM copy all modified (D) files and folders (even E, empty ones), and overwrite (Y) without prompting 8 | REM (Like Anki's own deployment, this does NOT remove any files. The simplest way to do so manually here is to delete the addons folder, then re-run.) 9 | 10 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt xml\*.* %USERPROFILE%\documents\Anki\addons\xml\ 11 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt xpath\*.* %USERPROFILE%\documents\Anki\addons\xpath\ 12 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt distutils\*.* %USERPROFILE%\documents\Anki\addons\distutils\ 13 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt syncxml\*.* %USERPROFILE%\documents\Anki\addons\syncxml\ 14 | xcopy /D /Y syncx.py %USERPROFILE%\documents\Anki\addons\ 15 | 16 | PAUSE 17 | -------------------------------------------------------------------------------- /CopyFilesToAddons.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # To run this script from the terminal command line: ./CopyFilesToAddons.sh 4 | # A Linux 'build' script. Run this after editing the addon's files. Then restart Anki. 5 | # Don't make edits over there in addons directly, as VCS won't see them, and an Anki or addon upgrade might destroy them. 6 | 7 | # Copy recursively (-r) all modified files and folders, except the excluded ones 8 | # (Like Anki's own deployment, this does NOT remove any files. The simplest way to do so manually here is to delete the addons folder, then re-run.) 9 | 10 | mkdir -p ~/Anki/addons 11 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt xml ~/Anki/addons/ 12 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt xpath ~/Anki/addons/ 13 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt distutils ~/Anki/addons/ 14 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt syncxml ~/Anki/addons/ 15 | cp -r -u -v syncx.py ~/Anki/addons/ 16 | -------------------------------------------------------------------------------- /ExcludedFilesLinux.txt: -------------------------------------------------------------------------------- 1 | - *.pyc 2 | - local_launch.py 3 | - syncx_local.py 4 | - SyncFromXML_config.txt 5 | - SyncFromXML_log.txt 6 | - SyncFromXML_configB* -------------------------------------------------------------------------------- /ExcludedFilesWindows.txt: -------------------------------------------------------------------------------- 1 | .pyc\ 2 | \local_launch.py\ 3 | \syncx_local.py\ 4 | \SyncFromXML_config.txt\ 5 | \SyncFromXML_log.txt\ 6 | \SyncFromXML_configB -------------------------------------------------------------------------------- /How-to-limit-to-a-subset-of-the-lexicon.md: -------------------------------------------------------------------------------- 1 | How to export a subset of entries from FLEx to LIFT for use in Anki, 2 | or to export all and then limit in Anki. 3 | 4 | How to use only a subset of entries from FLEx in Anki 5 | 6 | (A) Type a code that you can filter on (e.g. "DIFC01") into an entry-level field in FLEx. Then, every time you want to sync your data to Anki, you would filter on this and export the filtered lexicon to LIFT. The best approach would be to add a custom list and list field (e.g. "Difficulty") to the entry level and use it to assign difficulty levels to each vocabulary item. You could then export just level 1 to Anki at first, then gradually add and export higher difficulty words over time. 7 | 8 | (a1 - faster sync) A filtered export like this runs faster and keeps all your editing in FLEx. But it requires that you apply this same filter each time you export. 9 | 10 | (a2 - less work for the human) export the whole lexicon every time, including that field. In Anki, enable/disable cards based on the field value. (The next sync will *not* blow away Anki's own info such as this disabling.) 11 | 12 | Here is some help on filtering and suspending in Anki: 13 | http://ankisrs.net/docs/dev/manual.html#searching 14 | http://ankisrs.net/docs/dev/manual.html#editmore 15 | 16 | There's also a new "filtered deck" feature that might provide exactly this functionality, but I have not yet tried it out: 17 | http://ankisrs.net/docs/dev/manual.html#filtered 18 | 19 | .. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Free and open source, under Anki's license. 2 | Created 2011-2014 by SIL International (coded by Jonathan Coombs) 3 | -------------------------------------------------------------------------------- /README-manual-build.txt: -------------------------------------------------------------------------------- 1 | To 'build' this addon, first make sure that the version number in syncxml.py is correct. Then, make a zip file containing these folders and this file: 2 | syncxml/ 3 | distutils/ 4 | xml/ 5 | xpath/ 6 | syncx.py 7 | 8 | Then, log in and upload the zip file to this addon's page: 9 | https://ankiweb.net/shared/info/915685712 10 | 11 | Optionally, first make sure that the files here are up to date: 12 | syncxml/samples 13 | 14 | That is, if you've edited the FLEx project, do this: 15 | - Re-export that (with media) to LIFT 16 | - If necessary, delete the Anki project, and Note Types DICT_LIFT/DICT_LIFT_EX, and reload this afresh: 17 | syncxml/samples/lift-dictionary.apkg 18 | - Sync the LIFT/media into that fresh Anki deck, then re-create an updated .apkg file. 19 | At this point you could also re-share the .apkg file on its own, here: 20 | https://ankiweb.net/shared/decks/ 21 | 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FlashGrab (a.k.a. lexml2anki, SyncFromXml, syncxml) 2 | ========== 3 | 4 | Please see https://github.com/sillsdev/flashgrab/wiki for an overview. 5 | 6 | The "Sync one-way from XML" addon for Anki pulls data into Anki from one or more external XML sources, as 7 | determined by a config file. It does not modify fields that are specific to Anki (e.g. it does not reset any existing 8 | flashcards), but it WILL OVERWRITE any edits that you've made within Anki to the data that was pulled in. The 9 | assumption is that you want to do your editing elsewhere and simply pull the latest data into Anki. 10 | 11 | Note: if you'll be importing LIFT data, the first thing the auto-config does is import the 12 | .apkg file. (It is located in the samples folder next to this readme file; that is, in 13 | %userprofile%\My Documents\Anki\addons\syncxml\samples .) This will put the needed structures in place so that 14 | there are enough fields in the Anki deck to receive several different dictionary fields. 15 | 16 | After that, the wizard will try to auto-configure itself for your LIFT data. 17 | 18 | The rest of this readme file describes the details of the config file. For general documentation, see 19 | SyncFromXML_help.pdf instead. (You may find also find the log file useful for troubleshooting during the initial 20 | configuration.) 21 | 22 | CONFIG DETAILS: 23 | 24 | The configuration (SyncFromXML_config.txt) file tells the addon: 25 | - where to find the XML file to pull its data from, 26 | - where precisely the data is located within that source file, and 27 | - where to store it in Anki. 28 | 29 | Before running the addon, try editing the config file by specifying the source file paths and carefully replacing the writing system codes between the " or {} marks. Also make sure that the correct model / note type (e.g. DICT_LIFT) is available in Anki (the easiest way to do this is to import the preconfigured sample deck), and make sure that 'deckname' refers to the Anki deck you wish to copy your data into. This should be enough to start pulling data in from your LIFT file. 30 | 31 | 32 | CONFIGURATION OPTIONS: 33 | 34 | source_file, source, source_field: To start pulling data into Anki, the source_file path needs to point to an XML file, and there needs to be a valid 'source' element containing at least one valid 'source_field' element. You can supply multiple source files if you wish, by specifying multiple 'source' elements. (And a single source file may be used more than once.) 35 | 36 | To load from the addon's 'samples' subfolder, just a short relative path will suffice for each path. Example: 37 | 38 | source_file="samples/Lindu-Test.lift" 39 | source_audio_folder="samples/audio" 40 | source_image_folder="samples/pictures" > 41 | 42 | But assuming your own source file is located elsewhere, you'll want to specify absolute paths. Example: 43 | 44 | source_file="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\Lindu-Test.lift" 45 | source_audio_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\audio" 46 | source_image_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\pictures" > 47 | 48 | audio_folder, image_folder: The base paths for finding any media files whose links are not already stored as absolute paths in the XML file. 49 | 50 | base_xpath: The base_xpath for each source matches on the root element that will correspond to one Anki note, and all source_field elements must be relative to that base path, representing the fields that will be copied over into that note. For example, given a LIFT file as input, the element would typically be specified as the root, and each source_field would point to something within , such as Lexeme Form, Gloss, Illustration, etc. 51 | 52 | grab: Typically, you should specify grab="first" for each field, in which case only the first matching value will be copied over. But for non-media fields, you may also specify grab="concat_all", in which case multiple values will be concatenated (separated by a separator such as ;;). In the sample config file, multiple glosses and multiple definitions are handled using concat_all. 53 | 54 | autodelete: This should be set to "false" since this feature is not yet implemented. 55 | 56 | 57 | ASSUMPTIONS AND LIMITATIONS: 58 | - Each sync operation will OVERWRITE the matching data in any matching Anki note. This is intentional, allowing you to do all of your editing externally rather than doing duplicate work in Anki. 59 | - Currently, *all* matching records are imported from the source file. If you want a filtered subset, you'll need to trim down the XML file. (E.g. in FLEx, you can choose to filter down to a desired subset before exporting to LIFT XML.) 60 | 61 | 62 | DEVELOPER NOTES: 63 | - Standard XPath syntax is used in this config file, except that quotes need to be escaped, either using the standard "ws" syntax, or using my non-standard {ws} syntax. (The addon supports the latter for convenience/readability.) 64 | - The XPath actually used by the sync software will consist of (a) the base_path, plus (b) the field's own xpath fragment, plus (c) descendant-or-self::text() . 65 | - Anything involving links would be hard to pull in cleanly, since these are stored as (long) IDs in LIFT. XPath has no way of 66 | knowing how to follow LIFT links. However, the first part of the ID is usually readable, so pulling the whole thing into 67 | an Anki field might be barely usable, though ugly, as in this example for synonyms. Ditto for other links (e.g. confer). 68 | 69 | 70 | 71 | A link to "absuwelto" would display like this: absuwelto_e50fee0e-45d3-43d3-bc71-b6cd4983607c 72 | 73 | Probably not worth it. And doing this for variants or complex forms specifically is even trickier, since distinguishing between 74 | them would require looking deeper down first, into the nested element, to decide whether or not its parent is a match. 75 | 76 | - The XPath syntax supported by this addon is all standard, with one tweak: since these xpaths are embedded here 77 | in XML elements, quotation marks should not be used directly. Rather than using " every time, curly brackets are supported. 78 | 79 | - I think the following ought to work, but doesn't; the syntax for reading attributes instead of filtering on them 80 | seems poorly supported (though it works for the root element's guid attribute): 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /User 1/collection.media/blank.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/User 1/collection.media/blank.txt -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Anki addon that performs a one-way sync, pulling data into Anki 3 | from the source XML file(s) as specified in the config file. 4 | """ 5 | 6 | from syncxml import SyncFromXML 7 | 8 | -------------------------------------------------------------------------------- /anki notes suggestions.txt: -------------------------------------------------------------------------------- 1 | 2 | I took a course in Second Language Acquisition as part of a degree in linguistics, and the primary strategy we were taught for learning new vocabulary was TPR (Total Physical Response). This generally involves pointing at something or making some physical movement as a correct response to what a native speaker says as part of a practice drill. Example: while looking at a grid of four (or more) photos, the trainer describes something about one of them, and the listener points to the correct photo. 3 | 4 | That is the approach that Rosetta Stone takes, and it is very effective. However, I think it could be much more effective in an SRS context such as Anki, where you don't have to repeat an entire lesson just to work on the one or two hard-to-memorize words in that lesson. 5 | 6 | In Anki, I think the trick would be to keep selecting four (or more) cards that all would 'work' in a given drill. In the example above, four cards that contained both audio and an image would do nicely; just one of the four audio clips would be played, and the user would click on the corresponding image. Here's a plain-text example: an English word is prominently displayed center-screen, and the user clicks on one of four tiles containing a foreign-language word, hopefully the one with that English meaning. 7 | 8 | 9 | C:\Documents and Settings\user57\Application Data\.anki\plugins 10 | D:\files\workspace\AnkiFromXML 11 | C:\Program Files\Anki\xml 12 | anki-2.0.9 13 | 14 | ----- 15 | 16 | Okay, I found this page again (which had previously seemed unnecessary for plugin development): 17 | http://ankisrs.net/docs/WindowsDevEnvironment.html 18 | 19 | I guess I need to bite the bullet and go through all of those steps. But do I really need to use Python 2.6.x? I've already got 2.7.2 installed (as well as 3.2). 20 | 21 | Jon 22 | 23 | donate: 24 | Win Media Player plugin? 25 | 26 | 27 | creating Qt interfaces for Python apps: 28 | http://query7.com/learning-python-qt-programming-with-pyqt-pyside 29 | examples 30 | http://www.cs.usfca.edu/~afedosov/qttut/ (good basic tutorial) 31 | http://diotavelli.net/PyQtWiki/Creating_GUI_Applications_with_PyQt_and_Qt_Designer 32 | 33 | Reply to this thread? 34 | "it would be a lot easier if there was more documentation" 35 | http://groups.google.com/group/ankisrs/browse_thread/thread/9a4226454d629cb6?pli=1 36 | 37 | 38 | 39 | Where hooks are and aren't in Anki: 40 | http://groups.google.com/group/ankisrs/browse_thread/thread/41627e4fc4ab965a/953279aa7cefc3b6?lnk=gst&q=plugin&pli=1 41 | 42 | My thread: 43 | http://groups.google.com/group/ankisrs/browse_thread/thread/7cda32eff95a78ca/6caae97f05cc2767?lnk=gst&q=jon+c# 44 | 45 | Two posts where I mentioned my plans: 46 | http://groups.google.com/group/flex-list/browse_thread/thread/aaabc329bd542bc1# 47 | http://groups.google.com/group/flex-list/browse_thread/thread/97b9d6ef80a728e2/217716f873cf5a8c?lnk=gst&q=keyes# 48 | 49 | The LIFT list: 50 | http://groups.google.com/group/LexiconInterchangeFormat 51 | LIFTDotNet (some C# code for parsing LIFT): 52 | http://lift-standard.googlecode.com/svn/trunk/LIFTDotNet/ 53 | 54 | Spanish deck note: "ALSO, because these cards were added at the end, the deck is now out of order. 55 | This isn't a major problem (just means you'll get all the errored cards at the end), 56 | but if you want to avoid this (recommended), make inactive tags 2000-10000 when you're 57 | on the first 1000, 3000-10000 when on the second 2000, etc." 58 | 59 | Other decks of interest: 60 | IPA 61 | Arabic alphabet 62 | U.S. state capitls w screenshots 63 | hi freq Hebrew vocab 64 | Greek 65 | US trees 66 | The Indonesian way 67 | 68 | Cards to add to lifedeck: 69 | Lindu team members 70 | Kulawi team members 71 | Kulawi kepala desa 72 | supporters 73 | Bethany/Calvary/WBC friends 74 | KD translator in Toraja 75 | 76 | plugins: 77 | - hello world 78 | - *For developers only: batch processing plugin example 79 | - search fields 80 | - tolerant spell-check 81 | - improved card search 82 | - display all the answers to a question (i.e. for all senses? homographs?) 83 | - bulk cloze 84 | - find homonyms/synonyms 85 | - read: Anki reset 86 | - mark on delete 87 | 88 | 89 | MODEL: 90 | lexeme form (prevent empty) 91 | hom num? 92 | variants? 93 | lexeme form audio 94 | meanings (concat of glosses and s.t. defs) 95 | defs? 96 | example text 97 | more examples? 98 | example audio 99 | image 100 | 101 | Use lxml because it supports XPath and auto-validation from an RNG file 102 | to strip formatting, mixed-WS stuff, etc. use this XPath: 103 | descendant-or-self::text() 104 | that will return a list aa, for which you can "".join(aa) 105 | http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/xpath.html 106 | 107 | a- 108 | 109 | probably want to parse iteratively; here are some tips: 110 | http://www.ibm.com/developerworks/xml/library/x-hiperfparse/ 111 | 112 | XPath notes... 113 | drink//price 114 | From the descendants of "drink", select all "price" elements. 115 | inventory/drink/pop@supplier 116 | Selects the supplier attribute of pop 117 | amount/.. 118 | This expression uses a relative path location and selects all of the amount elements in our XML document. It then uses the parent sequence ".." to select the parent element of each amount element. In our XML document, there are three amount elements, and their parents are lemonade, pop and chips 119 | inventory/snack/chips/* 120 | selects all children of chips 121 | inventory/*/*/* 122 | selects all great-grandchildren of inventory (I think) 123 | more: http://www.tizag.com/xmlTutorial/xpathbar.php 124 | 125 | --------------------------------------- 126 | PLUGIN DEVELOPMENT 127 | 128 | READ THIS: 129 | http://ankisrs.net/docs/dev/addons.html 130 | 131 | 132 | --------------------------------------- 133 | PLUGIN DEVELOPMENT - JON'S NOTES 134 | 135 | - Using File, Download, Shared Plugin, download a sample plugin. 136 | - Choose Settings, Plugins, Open Plugin Folder, and then open the plugin's code in a text editor. 137 | - Review it to see how it works and if it looks safe, then restart Anki and try out the new functionality. 138 | - Anki runs (imports? loads?) the plugin code once when you start Anki, so if you need to edit the plugin code, you'll need to save it and restart Anki in order for your changes to take effect. 139 | - To create your own plugin, just write some code and save it into that same folder. Or, copy the sample plugin's file and adapt it to your needs. 140 | 141 | SAMPLE PLUGINS 142 | 143 | "Hello World" is a very simple plugin that does one thing: During a review, it interposes a custom event handler that will pop up a "Hello world!" message box if the user pressed the "r" key. After checking for this "r" keystroke, it passes control back to the default event handler. 144 | 145 | "For developers only: batch processing plugin example" functions in a different context--it runs a process (which you need to write yourself) on every currently-selected fact in the Browser window. The filename ends in ".py.OFF" by default, so you'll need to delete the ".OFF" part before launching Anki, or else Anki can't load the plugin. And the plugin won't actually do anything to your selected facts until you write code where the commented-out section is. Proceed carefully! If you insert the following snippet of code there, it will append a period to the Front field in every selected fact. 146 | for fact in facts: fact["Front"] += "." 147 | 148 | API AND SOURCE CODE 149 | 150 | The plugins that are out there give practical examples of how to use the Anki API, but that API is not yet documented, so to really understand it, you may want to download the Anki source code, and look at the following modules, perhaps in the following order: 151 | - 152 | - ??? 153 | 154 | Hint-peeking 155 | Image Download 156 | Learn Mode 157 | ?Interval Tweak 158 | 159 | consider also installing: 160 | Resize Images 161 | Speak Slower 162 | Custom Media Player 2 (needed for non-ASCII filenames?) 163 | ?Reset Creation Times 164 | 165 | v2: 166 | consider something like Real-Time Import for auto-update from FLEx/WeSay. 167 | 168 | -------------------------------------------------------------------------------- /flashgrab-v0.8.7.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/flashgrab-v0.8.7.zip -------------------------------------------------------------------------------- /icons/flashgrid-flashgrab-icons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/icons/flashgrid-flashgrab-icons.png -------------------------------------------------------------------------------- /icons/readme.txt: -------------------------------------------------------------------------------- 1 | These are some icon ideas I created a while back. 2 | 3 | I'm not sure how useful it would be to have these showing on our dialogs, etc. 4 | 5 | If so, it would be good to see whether the star shape impinges too much on the official Anki icon. 6 | 7 | -Jon -------------------------------------------------------------------------------- /readme-developers.txt: -------------------------------------------------------------------------------- 1 | Here is some info on getting started developing this addon, or Anki in general. 2 | 3 | Much of it--but not all--is focused on setting up a Linux VM as the dev/test machine, although with a few manual steps dev can be done on Windows too. 4 | 5 | https://github.com/sillsdev/flashgrab/wiki/How-to-run-and-debug-Anki-in-a-Linux-virtual-machine-or-in-Windows 6 | -------------------------------------------------------------------------------- /sample-src/Indonesian Vocab 2015-01-30 1445 tagged-in-notes.fwbackup: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/sample-src/Indonesian Vocab 2015-01-30 1445 tagged-in-notes.fwbackup -------------------------------------------------------------------------------- /sample-src/Indonesian Vocab 2015-02-04 2149 updated media.fwbackup: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/sample-src/Indonesian Vocab 2015-02-04 2149 updated media.fwbackup -------------------------------------------------------------------------------- /sample-src/readme-indo-sample.txt: -------------------------------------------------------------------------------- 1 | This is a backup of the master project in FLEx that was used 2 | for all the editing of this deck. 3 | 4 | It was 5 | - exported to LIFT, 6 | - synced one-way into Anki using FlashGrab, and then 7 | - the records in Anki were manually tagged based on the Notes (Comment) field. 8 | -------------------------------------------------------------------------------- /syncx_local.py: -------------------------------------------------------------------------------- 1 | ''' A test script. 2 | @author: Jonathan Coombs, copyright sil.org 2012-2014 3 | ''' 4 | 5 | #Force Python 3 syntax 6 | 7 | 8 | 9 | from syncxml import local_launch 10 | from syncxml import xml_util as X 11 | from syncxml import syncxml as SX 12 | from syncxml import logg as L 13 | 14 | import os 15 | 16 | def get_path(name): 17 | f = SX.get_first_lift_file(os.path.join("..", "tests", name)) 18 | return f 19 | 20 | def get_lift_paths(): 21 | L = os.listdir(os.path.join("..", "tests")) 22 | L2 = [get_path(x) for x in L] 23 | return L2 24 | 25 | 26 | 27 | def count_ws(): 28 | pass 29 | 30 | def tests(): 31 | """Some quick and dirty tests on auto-configuring and reading some actual LIFT files. 32 | Note: on my test machine, some of these have corresponding FLEx media folders, here: 33 | C:\ProgramData\SIL\FieldWorks\Projects\Bambara flashcards\LinkedFiles 34 | C:\ProgramData\SIL\FieldWorks\Projects\Moma-temp\LinkedFiles""" 35 | L.init(SX.LOG_FP, L.VERBOSITY_DEBUG) 36 | paths = get_lift_paths() 37 | #expected = dict() 38 | #expected['cgg'] = 39 | from time import sleep 40 | for p in paths: 41 | """ WAS: 42 | # analyze the WSes in this file 43 | size = os.path.getsize(p) 44 | L.debug("File {} is {} bytes".format(p, size)) 45 | lift_few = X.get_lift_subset(p, 30) # .tmp 46 | langs = X.XmlLiftLangs(lift_few) 47 | v = langs.vern_ws 48 | n = langs.nat_ws 49 | os.remove(lift_few) 50 | """ 51 | local_launch.main(p) 52 | sleep(0.5) # give Notepad a chance to open 53 | 54 | #count_ws() 55 | L.close_file() 56 | 57 | 58 | if (__name__ == '__main__'): 59 | # If we're here, we're not running inside Anki, but we can still test/debug various pieces... 60 | os.chdir('syncxml') # go down one level, so we're in the same relative position as the Anki add-on 61 | 62 | tests() 63 | local_launch.main() 64 | -------------------------------------------------------------------------------- /syncxml/README.md: -------------------------------------------------------------------------------- 1 | FlashGrab (a.k.a. lexml2anki, SyncFromXml, etc.) 2 | ========== 3 | 4 | An Anki addon for pulling flashcard data (one-way sync) from XML. Optimized for LIFT XML (from WeSay or FLEx). 5 | See the Anki website's list of provided addons. 6 | https://ankiweb.net/shared/info/915685712 7 | 8 | .. -------------------------------------------------------------------------------- /syncxml/SyncFromXML_config_default.txt: -------------------------------------------------------------------------------- 1 |  2 | 3 | 10 | 11 | 12 | 13 | 14 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 71 | 72 | -------------------------------------------------------------------------------- /syncxml/SyncFromXML_help.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/SyncFromXML_help.pdf -------------------------------------------------------------------------------- /syncxml/SyncFromXML_log.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/SyncFromXML_log.txt -------------------------------------------------------------------------------- /syncxml/SyncFromXML_readme.txt: -------------------------------------------------------------------------------- 1 | The "Sync one-way from XML" addon for Anki pulls data into Anki from one or more external XML sources, as 2 | determined by a config file. It does not modify fields that are specific to Anki (e.g. it does not reset any existing 3 | flashcards), but it WILL OVERWRITE any edits that you've made within Anki to the data that was pulled in. The 4 | assumption is that you want to do your editing elsewhere and simply pull the latest data into Anki. 5 | 6 | Note: if you'll be importing LIFT data, the first thing the auto-config does is import the 7 | .apkg file. (It is located in the samples folder next to this readme file; that is, in 8 | %userprofile%\My Documents\Anki\addons\syncxml\samples .) This will put the needed structures in place so that 9 | there are enough fields in the Anki deck to receive several different dictionary fields. 10 | 11 | After that, the wizard will try to auto-configure itself for your LIFT data. 12 | 13 | The rest of this readme file describes the details of the config file. For general documentation, see 14 | SyncFromXML_help.pdf instead. (You may find also find the log file useful for troubleshooting during the initial 15 | configuration.) 16 | 17 | CONFIG DETAILS: 18 | 19 | The configuration (SyncFromXML_config.txt) file tells the addon: 20 | - where to find the XML file to pull its data from, 21 | - where precisely the data is located within that source file, and 22 | - where to store it in Anki. 23 | 24 | Before running the addon, try editing the config file by specifying the source file paths and carefully replacing the writing system codes between the " or {} marks. Also make sure that the correct model / note type (e.g. DICT_LIFT) is available in Anki (the easiest way to do this is to import the preconfigured sample deck), and make sure that 'deckname' refers to the Anki deck you wish to copy your data into. This should be enough to start pulling data in from your LIFT file. 25 | 26 | 27 | CONFIGURATION OPTIONS: 28 | 29 | source_file, source, source_field: To start pulling data into Anki, the source_file path needs to point to an XML file, and there needs to be a valid 'source' element containing at least one valid 'source_field' element. You can supply multiple source files if you wish, by specifying multiple 'source' elements. (And a single source file may be used more than once.) 30 | 31 | To load from the addon's 'samples' subfolder, just a short relative path will suffice for each path. Example: 32 | source_file="samples/Lindu-Test.lift" 33 | source_audio_folder="samples/audio" 34 | source_image_folder="samples/pictures" > 35 | But assuming your own source file is located elsewhere, you'll want to specify absolute paths. Example: 36 | source_file="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\Lindu-Test.lift" 37 | source_audio_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\audio" 38 | source_image_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\pictures" > 39 | 40 | audio_folder, image_folder: The base paths for finding any media files whose links are not already stored as absolute paths in the XML file. 41 | 42 | base_xpath: The base_xpath for each source matches on the root element that will correspond to one Anki note, and all source_field elements must be relative to that base path, representing the fields that will be copied over into that note. For example, given a LIFT file as input, the element would typically be specified as the root, and each source_field would point to something within , such as Lexeme Form, Gloss, Illustration, etc. 43 | 44 | grab: Typically, you should specify grab="first" for each field, in which case only the first matching value will be copied over. But for non-media fields, you may also specify grab="concat_all", in which case multiple values will be concatenated (separated by a separator such as ;;). In the sample config file, multiple glosses and multiple definitions are handled using concat_all. 45 | 46 | autodelete: This should be set to "false" since this feature is not yet implemented. 47 | 48 | 49 | ASSUMPTIONS AND LIMITATIONS: 50 | - Each sync operation will OVERWRITE the matching data in any matching Anki note. This is intentional, allowing you to do all of your editing externally rather than doing duplicate work in Anki. 51 | - Currently, *all* matching records are imported from the source file. If you want a filtered subset, you'll need to trim down the XML file. (E.g. in FLEx, you can choose to filter down to a desired subset before exporting to LIFT XML.) 52 | 53 | 54 | DEVELOPER NOTES: 55 | - Standard XPath syntax is used in this config file, except that quotes need to be escaped, either using the standard "ws" syntax, or using my non-standard {ws} syntax. (The addon supports the latter for convenience/readability.) 56 | - The XPath actually used by the sync software will consist of (a) the base_path, plus (b) the field's own xpath fragment, plus (c) descendant-or-self::text() . 57 | - Anything involving links would be hard to pull in cleanly, since these are stored as (long) IDs in LIFT. XPath has no way of 58 | knowing how to follow LIFT links. However, the first part of the ID is usually readable, so pulling the whole thing into 59 | an Anki field might be barely usable, though ugly, as in this example for synonyms. Ditto for other links (e.g. confer). 60 | 61 | A link to "absuwelto" would display like this: absuwelto_e50fee0e-45d3-43d3-bc71-b6cd4983607c 62 | 63 | Probably not worth it. And doing this for variants or complex forms specifically is even trickier, since distinguishing between 64 | them would require looking deeper down first, into the nested element, to decide whether or not its parent is a match. 65 | 66 | - The XPath syntax supported by this addon is all standard, with one tweak: since these xpaths are embedded here 67 | in XML elements, quotation marks should not be used directly. Rather than using " every time, curly brackets are supported. 68 | 69 | - I think the following ought to work, but doesn't; the syntax for reading attributes instead of filtering on them 70 | seems poorly supported (though it works for the root element's guid attribute): 71 | 72 | -------------------------------------------------------------------------------- /syncxml/__init__.py: -------------------------------------------------------------------------------- 1 | from syncxml import SyncFromXML -------------------------------------------------------------------------------- /syncxml/anki_util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code for interacting with Anki. If running locally (outside Anki), 3 | very little will work, though it does fall back to local folders a bit. 4 | ''' 5 | 6 | #Force Python 3 syntax 7 | 8 | import os, sys 9 | 10 | try: 11 | from aqt import mw 12 | IN_ANKI = True 13 | #anki_user_profile = mw.pm.profileFolder() #can't do this yet; mw is still null 14 | from anki.importing import AnkiPackageImporter 15 | except ImportError: 16 | IN_ANKI = False 17 | anki_user_profile = '../User 1' #'D:\\files\\user57\\documents\\Anki\\User 1' 18 | 19 | ANKI_MEDIA_FOLDER = 'collection.media' 20 | 21 | ADDON_FOLDER = 'syncxml' 22 | APKG_PATH = os.path.join("samples", "lift-dictionary.apkg") 23 | NO_MODEL_INSTR = "\nSteps: Restart Anki and make sure the target deck exists. Use Tools, Manage Note Types, or go to File Import, and import Anki/addons/syncxml/{}. Or, reconfigure.)".format(APKG_PATH) 24 | NO_MODEL_MSG = "The target model with the fields we needed was missing; have attempted to re-import the default APKG but an error occurred. \nPlease try again, or create it manually. \n{}".format(NO_MODEL_INSTR) 25 | TARGET_DECK = "lift-dictionary" 26 | 27 | def anki_model_exists(model): 28 | if (mw.col.models.byName(model)): 29 | return True 30 | return False 31 | 32 | 33 | 34 | def addons_folder(): 35 | fld = ".." 36 | if IN_ANKI: 37 | fld = mw.pm.addonFolder() 38 | return fld 39 | 40 | def get_filepath(f): 41 | return os.path.join(addons_folder(), ADDON_FOLDER, f) 42 | 43 | 44 | 45 | def get_anki_fields(modelname): #(targ, source): 46 | """Given a model name, return a list/set of its fields.""" 47 | 48 | n = ['Lexeme Form', 49 | 'Citation Form', 50 | 'Lex GUID', 51 | 'Lex Audio', 52 | 'Grammatical Info', 53 | 'Glosses', 54 | 'Definitions', 55 | 'Reversals', 56 | 'Picture', 57 | 'Example', 58 | 'Example Audio', 59 | 'Example Translation', 60 | 'xRxeversalxs'] 61 | if not IN_ANKI: 62 | return n #return hard-coded value 63 | 64 | model = mw.col.models 65 | mdict = model.byName(modelname) 66 | n = [x['name'] for x in mdict['flds']] 67 | return n 68 | 69 | def add_field(key, modelname): 70 | model = mw.col.models 71 | mdict = model.byName(modelname) 72 | model.setCurrent(mdict) 73 | f = model.newField(key) 74 | #mw.col.models.flush() 75 | model.addField(mdict, f) 76 | model.save() 77 | 78 | def import_apkg_model(path, delete=False): 79 | #If delete==True, our sole purpose is to import the note types and flashcard templates, not the data. 80 | 81 | imp = AnkiPackageImporter(mw.col, path) 82 | imp.run() 83 | mw.col.models.flush() 84 | mw.reset(True) # refresh the screen so that the newly imported deck is visible 85 | 86 | if delete: 87 | # Delete ALL cards/notes in the LIFT deck. Ignore errors 88 | try: 89 | # deck = mw.col.decks.byName(TARGET_DECK) #hmm, this returns a Python dict(); how to get the deck object? 90 | # assert deck.cardCount() > 0 91 | # assert deck.noteCount() > 0 92 | ids = mw.col.findCards("deck:{}".format(TARGET_DECK)) 93 | mw.col.remCards(ids, True) 94 | # assert deck.cardCount() == 0 95 | # assert deck.noteCount() == 0 96 | except: 97 | return "Failed to delete cards and notes after importing the APKG file." 98 | 99 | mw.col.models.flush() 100 | mw.reset(True) 101 | return "" 102 | -------------------------------------------------------------------------------- /syncxml/docsrc/SyncFromXML_help.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/SyncFromXML_help.odt -------------------------------------------------------------------------------- /syncxml/docsrc/checklog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/checklog.png -------------------------------------------------------------------------------- /syncxml/docsrc/customtype.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/customtype.png -------------------------------------------------------------------------------- /syncxml/docsrc/customtype_sm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/customtype_sm.png -------------------------------------------------------------------------------- /syncxml/docsrc/flex_export.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/flex_export.png -------------------------------------------------------------------------------- /syncxml/docsrc/menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/menu.png -------------------------------------------------------------------------------- /syncxml/docsrc/report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/report.png -------------------------------------------------------------------------------- /syncxml/docsrc/sync-report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/sync-report.png -------------------------------------------------------------------------------- /syncxml/docsrc/wizard-done.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard-done.png -------------------------------------------------------------------------------- /syncxml/docsrc/wizard-examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard-examples.png -------------------------------------------------------------------------------- /syncxml/docsrc/wizard1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard1.png -------------------------------------------------------------------------------- /syncxml/docsrc/wizard2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard2.png -------------------------------------------------------------------------------- /syncxml/file_util.py: -------------------------------------------------------------------------------- 1 | """distutils.file_util 2 | 3 | Utility functions for operating on single files. 4 | """ 5 | 6 | __all__ = [ 7 | # public symbols 8 | "copy_file", 9 | "move_file", 10 | "write_file", 11 | ] 12 | 13 | __revision__ = "$Id$" 14 | 15 | import os 16 | from distutils.errors import DistutilsFileError 17 | from distutils import log 18 | 19 | # for generating verbose output in 'copy_file()' 20 | _copy_action = {None: 'copying', 21 | 'hard': 'hard linking', 22 | 'sym': 'symbolically linking'} 23 | 24 | 25 | def _copy_file_contents(src, dst, buffer_size=16*1024): 26 | """Copy the file 'src' to 'dst'. 27 | 28 | Both must be filenames. Any error opening either file, reading from 29 | 'src', or writing to 'dst', raises DistutilsFileError. Data is 30 | read/written in chunks of 'buffer_size' bytes (default 16k). No attempt 31 | is made to handle anything apart from regular files. 32 | """ 33 | # Stolen from shutil module in the standard library, but with 34 | # custom error-handling added. 35 | fsrc = None 36 | fdst = None 37 | try: 38 | try: 39 | fsrc = open(src, 'rb') 40 | except os.error as xxx_todo_changeme3: 41 | (errno, errstr) = xxx_todo_changeme3.args 42 | raise DistutilsFileError("could not open '%s': %s" % (src, errstr)) 43 | 44 | if os.path.exists(dst): 45 | try: 46 | os.unlink(dst) 47 | except os.error as xxx_todo_changeme: 48 | (errno, errstr) = xxx_todo_changeme.args 49 | raise DistutilsFileError( 50 | "could not delete '%s': %s" % (dst, errstr)) 51 | 52 | try: 53 | fdst = open(dst, 'wb') 54 | except os.error as xxx_todo_changeme4: 55 | (errno, errstr) = xxx_todo_changeme4.args 56 | raise DistutilsFileError( 57 | "could not create '%s': %s" % (dst, errstr)) 58 | 59 | while 1: 60 | try: 61 | buf = fsrc.read(buffer_size) 62 | except os.error as xxx_todo_changeme1: 63 | (errno, errstr) = xxx_todo_changeme1.args 64 | raise DistutilsFileError( 65 | "could not read from '%s': %s" % (src, errstr)) 66 | 67 | if not buf: 68 | break 69 | 70 | try: 71 | fdst.write(buf) 72 | except os.error as xxx_todo_changeme2: 73 | (errno, errstr) = xxx_todo_changeme2.args 74 | raise DistutilsFileError( 75 | "could not write to '%s': %s" % (dst, errstr)) 76 | 77 | finally: 78 | if fdst: 79 | fdst.close() 80 | if fsrc: 81 | fsrc.close() 82 | 83 | def copy_file(src, dst, preserve_mode=1, preserve_times=1, update=0, 84 | link=None, verbose=1, dry_run=0): 85 | """Copy a file 'src' to 'dst'. 86 | 87 | If 'dst' is a directory, then 'src' is copied there with the same name; 88 | otherwise, it must be a filename. (If the file exists, it will be 89 | ruthlessly clobbered.) If 'preserve_mode' is true (the default), 90 | the file's mode (type and permission bits, or whatever is analogous on 91 | the current platform) is copied. If 'preserve_times' is true (the 92 | default), the last-modified and last-access times are copied as well. 93 | If 'update' is true, 'src' will only be copied if 'dst' does not exist, 94 | or if 'dst' does exist but is older than 'src'. 95 | 96 | 'link' allows you to make hard links (os.link) or symbolic links 97 | (os.symlink) instead of copying: set it to "hard" or "sym"; if it is 98 | None (the default), files are copied. Don't set 'link' on systems that 99 | don't support it: 'copy_file()' doesn't check if hard or symbolic 100 | linking is available. 101 | 102 | Under Mac OS, uses the native file copy function in macostools; on 103 | other systems, uses '_copy_file_contents()' to copy file contents. 104 | 105 | Return a tuple (dest_name, copied): 'dest_name' is the actual name of 106 | the output file, and 'copied' is true if the file was copied (or would 107 | have been copied, if 'dry_run' true). 108 | """ 109 | # XXX if the destination file already exists, we clobber it if 110 | # copying, but blow up if linking. Hmmm. And I don't know what 111 | # macostools.copyfile() does. Should definitely be consistent, and 112 | # should probably blow up if destination exists and we would be 113 | # changing it (ie. it's not already a hard/soft link to src OR 114 | # (not update) and (src newer than dst). 115 | 116 | from distutils.dep_util import newer 117 | from stat import ST_ATIME, ST_MTIME, ST_MODE, S_IMODE 118 | 119 | if not os.path.isfile(src): 120 | raise DistutilsFileError( 121 | "can't copy '%s': doesn't exist or not a regular file" % src) 122 | 123 | if os.path.isdir(dst): 124 | dir = dst 125 | dst = os.path.join(dst, os.path.basename(src)) 126 | else: 127 | dir = os.path.dirname(dst) 128 | 129 | if update and not newer(src, dst): 130 | if verbose >= 1: 131 | log.debug("not copying %s (output up-to-date)", src) 132 | return dst, 0 133 | 134 | try: 135 | action = _copy_action[link] 136 | except KeyError: 137 | raise ValueError("invalid value '%s' for 'link' argument" % link) 138 | 139 | if verbose >= 1: 140 | if os.path.basename(dst) == os.path.basename(src): 141 | log.info("%s %s -> %s", action, src, dir) 142 | else: 143 | log.info("%s %s -> %s", action, src, dst) 144 | 145 | if dry_run: 146 | return (dst, 1) 147 | 148 | # If linking (hard or symbolic), use the appropriate system call 149 | # (Unix only, of course, but that's the caller's responsibility) 150 | if link == 'hard': 151 | if not (os.path.exists(dst) and os.path.samefile(src, dst)): 152 | os.link(src, dst) 153 | elif link == 'sym': 154 | if not (os.path.exists(dst) and os.path.samefile(src, dst)): 155 | os.symlink(src, dst) 156 | 157 | # Otherwise (non-Mac, not linking), copy the file contents and 158 | # (optionally) copy the times and mode. 159 | else: 160 | _copy_file_contents(src, dst) 161 | if preserve_mode or preserve_times: 162 | st = os.stat(src) 163 | 164 | # According to David Ascher , utime() should be done 165 | # before chmod() (at least under NT). 166 | if preserve_times: 167 | os.utime(dst, (st[ST_ATIME], st[ST_MTIME])) 168 | if preserve_mode: 169 | os.chmod(dst, S_IMODE(st[ST_MODE])) 170 | 171 | return (dst, 1) 172 | 173 | # XXX I suspect this is Unix-specific -- need porting help! 174 | def move_file (src, dst, verbose=1, dry_run=0): 175 | """Move a file 'src' to 'dst'. 176 | 177 | If 'dst' is a directory, the file will be moved into it with the same 178 | name; otherwise, 'src' is just renamed to 'dst'. Return the new 179 | full name of the file. 180 | 181 | Handles cross-device moves on Unix using 'copy_file()'. What about 182 | other systems??? 183 | """ 184 | from os.path import exists, isfile, isdir, basename, dirname 185 | import errno 186 | 187 | if verbose >= 1: 188 | log.info("moving %s -> %s", src, dst) 189 | 190 | if dry_run: 191 | return dst 192 | 193 | if not isfile(src): 194 | raise DistutilsFileError("can't move '%s': not a regular file" % src) 195 | 196 | if isdir(dst): 197 | dst = os.path.join(dst, basename(src)) 198 | elif exists(dst): 199 | raise DistutilsFileError( 200 | "can't move '%s': destination '%s' already exists" % 201 | (src, dst)) 202 | 203 | if not isdir(dirname(dst)): 204 | raise DistutilsFileError( 205 | "can't move '%s': destination '%s' not a valid path" % \ 206 | (src, dst)) 207 | 208 | copy_it = 0 209 | try: 210 | os.rename(src, dst) 211 | except os.error as xxx_todo_changeme6: 212 | (num, msg) = xxx_todo_changeme6.args 213 | if num == errno.EXDEV: 214 | copy_it = 1 215 | else: 216 | raise DistutilsFileError( 217 | "couldn't move '%s' to '%s': %s" % (src, dst, msg)) 218 | 219 | if copy_it: 220 | copy_file(src, dst, verbose=verbose) 221 | try: 222 | os.unlink(src) 223 | except os.error as xxx_todo_changeme5: 224 | (num, msg) = xxx_todo_changeme5.args 225 | try: 226 | os.unlink(dst) 227 | except os.error: 228 | pass 229 | raise DistutilsFileError( 230 | ("couldn't move '%s' to '%s' by copy/delete: " + 231 | "delete '%s' failed: %s") % 232 | (src, dst, src, msg)) 233 | return dst 234 | 235 | 236 | def write_file (filename, contents): 237 | """Create a file with the specified name and write 'contents' (a 238 | sequence of strings without line terminators) to it. 239 | """ 240 | f = open(filename, "w") 241 | try: 242 | for line in contents: 243 | f.write(line + "\n") 244 | finally: 245 | f.close() 246 | -------------------------------------------------------------------------------- /syncxml/flex_util.py: -------------------------------------------------------------------------------- 1 | #Force Python 3 syntax 2 | 3 | from . import logg as L 4 | import os 5 | 6 | FLEX_DIR = r"C:\ProgramData\SIL\FieldWorks\Projects" 7 | FLEX_DIR_XP = r"C:\Documents and Settings\All Users\Application Data\SIL\FieldWorks\Projects" 8 | FLEX_DIR_LINUX = r"~/.local/share/fieldworks/Projects" 9 | LINKED_FILES = "LinkedFiles" 10 | FLEX_AUDIO = "AudioVisual" 11 | FLEX_IMAGE = "Pictures" 12 | 13 | def flex_dir(): 14 | d = "" 15 | tmp = os.path.expanduser(FLEX_DIR_LINUX) 16 | if os.path.exists(tmp): 17 | d = tmp 18 | elif os.path.exists(FLEX_DIR): # Vista and later have priority 19 | d = FLEX_DIR 20 | elif os.path.exists(FLEX_DIR_XP): 21 | d = FLEX_DIR_XP 22 | return d 23 | 24 | def flex_media(f, dir=""): 25 | """Given an absolute filepath such as c:\files\Catalan.lift, and the FLEx project folder, 26 | check whether audio and image folders are likely to be available under, say, 27 | C:\ProgramData\SIL\FieldWorks\Projects\Catalan\LinkedFiles 28 | Or, given just an absolute path, assume it's an fwdata file and deduce accordingly. 29 | """ 30 | media_dir = "" 31 | if dir: 32 | base = os.path.split(f)[1] # e.g. just Catalan.lift 33 | proj = os.path.splitext(base)[0] # e.g. just Catalan 34 | media_dir = os.path.join(dir, proj, LINKED_FILES) 35 | else: 36 | # assume .fwdata 37 | dir, _f = os.path.split(f) 38 | media_dir = os.path.join(dir, LINKED_FILES) 39 | 40 | media_dir = os.path.normpath(media_dir) #fix any inconsistent slashes 41 | L.debug("Selected file is {}.\n Checking to see if media_dir {} exists...".format(f, media_dir)) 42 | if os.path.exists(media_dir): 43 | # Simply assume both of the following do or will exist 44 | au = os.path.join(media_dir, FLEX_AUDIO) 45 | im = os.path.join(media_dir, FLEX_IMAGE) 46 | L.debug(" It does. choosing {} and {}".format(au, im)) 47 | return (au, im) 48 | else: 49 | L.debug(" It does not exist.") 50 | return None 51 | 52 | -------------------------------------------------------------------------------- /syncxml/glob.py: -------------------------------------------------------------------------------- 1 | """Filename globbing utility.""" 2 | 3 | import sys 4 | import os 5 | import re 6 | import fnmatch 7 | 8 | try: 9 | _unicode = str 10 | except NameError: 11 | # If Python is built without Unicode support, the unicode type 12 | # will not exist. Fake one. 13 | class _unicode(object): 14 | pass 15 | 16 | __all__ = ["glob", "iglob"] 17 | 18 | def glob(pathname): 19 | """Return a list of paths matching a pathname pattern. 20 | 21 | The pattern may contain simple shell-style wildcards a la 22 | fnmatch. However, unlike fnmatch, filenames starting with a 23 | dot are special cases that are not matched by '*' and '?' 24 | patterns. 25 | 26 | """ 27 | return list(iglob(pathname)) 28 | 29 | def iglob(pathname): 30 | """Return an iterator which yields the paths matching a pathname pattern. 31 | 32 | The pattern may contain simple shell-style wildcards a la 33 | fnmatch. However, unlike fnmatch, filenames starting with a 34 | dot are special cases that are not matched by '*' and '?' 35 | patterns. 36 | 37 | """ 38 | dirname, basename = os.path.split(pathname) 39 | if not has_magic(pathname): 40 | if basename: 41 | if os.path.lexists(pathname): 42 | yield pathname 43 | else: 44 | # Patterns ending with a slash should match only directories 45 | if os.path.isdir(dirname): 46 | yield pathname 47 | return 48 | if not dirname: 49 | for name in glob1(os.curdir, basename): 50 | yield name 51 | return 52 | # `os.path.split()` returns the argument itself as a dirname if it is a 53 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path 54 | # contains magic characters (i.e. r'\\?\C:'). 55 | if dirname != pathname and has_magic(dirname): 56 | dirs = iglob(dirname) 57 | else: 58 | dirs = [dirname] 59 | if has_magic(basename): 60 | glob_in_dir = glob1 61 | else: 62 | glob_in_dir = glob0 63 | for dirname in dirs: 64 | for name in glob_in_dir(dirname, basename): 65 | yield os.path.join(dirname, name) 66 | 67 | # These 2 helper functions non-recursively glob inside a literal directory. 68 | # They return a list of basenames. `glob1` accepts a pattern while `glob0` 69 | # takes a literal basename (so it only has to check for its existence). 70 | 71 | def glob1(dirname, pattern): 72 | if not dirname: 73 | dirname = os.curdir 74 | if isinstance(pattern, _unicode) and not isinstance(dirname, str): 75 | dirname = str(dirname, sys.getfilesystemencoding() or 76 | sys.getdefaultencoding()) 77 | try: 78 | names = os.listdir(dirname) 79 | except os.error: 80 | return [] 81 | if pattern[0] != '.': 82 | names = [x for x in names if x[0] != '.'] 83 | return fnmatch.filter(names, pattern) 84 | 85 | def glob0(dirname, basename): 86 | if basename == '': 87 | # `os.path.split()` returns an empty basename for paths ending with a 88 | # directory separator. 'q*x/' should match only directories. 89 | if os.path.isdir(dirname): 90 | return [basename] 91 | else: 92 | if os.path.lexists(os.path.join(dirname, basename)): 93 | return [basename] 94 | return [] 95 | 96 | 97 | magic_check = re.compile('[*?[]') 98 | 99 | def has_magic(s): 100 | return magic_check.search(s) is not None 101 | -------------------------------------------------------------------------------- /syncxml/local_launch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A test module. Does some minimal loading. Outside of Anki, we can't do much, but we can at least parse the input file. 3 | Needs to be imported from a script somewhere outside this package, and the osdir set to this one. 4 | (See syncx_local .) 5 | 6 | @author: Jonathan Coombs, copyright sil.org 2012-2014 7 | ''' 8 | 9 | #Force Python 3 syntax 10 | 11 | 12 | from . import syncxml as SX 13 | from . import SyncFromXML as UI 14 | from . import logg as L 15 | import os 16 | 17 | def main(target=""): 18 | ''' Runs a full set of tests on the non-Anki side of things. ''' 19 | 20 | #If we're here, we're not running inside Anki, but we can still test/debug various pieces... 21 | L.init(SX.LOG_FP, L.VERBOSITY_DEBUG) 22 | L.debug('\nRUNNING LOCALLY!\n') 23 | 24 | try: 25 | # full test 26 | if UI.reconfigure(target): 27 | cfpath = SX.get_config_path() 28 | if not os.path.exists(cfpath): 29 | ##log error 30 | return 31 | 32 | ##L.debug('Reconfigure was successful.') # So we know it's a success 33 | UI.try_sync() 34 | ##L.debug('Done--success.') # So we know it's a success 35 | else: 36 | pass 37 | ##L.debug('Done--failed.') 38 | finally: 39 | UI.launch_paths_maybe() 40 | #L.close_file() 41 | 42 | -------------------------------------------------------------------------------- /syncxml/logg.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A simple logger. 3 | 4 | @author: Jonathan Coombs, copyright sil.org 2012-2014 5 | ''' 6 | 7 | #Force Python 3 syntax 8 | 9 | import os 10 | import platform 11 | 12 | VERBOSITY_SILENT = 0 13 | VERBOSITY_MINIMAL = 1 14 | VERBOSITY_NORMAL = 2 15 | VERBOSITY_DEBUG = 3 16 | VERBOSITY = VERBOSITY_DEBUG # Normally, choose VERBOSITY_NORMAL or VERBOSITY_DEBUG; VERBOSITY_SILENT might possibly help with unicode-related crashes. 17 | 18 | # It's ok if client wants to set these differently, but remember it's a singleton 19 | LOG_PATH = '' 20 | LOG_FILE = 'logg.txt' 21 | 22 | __MAIN_LOG = None # Mimics a singleton by using the following module-level methods 23 | # Note that singletons aren't great, but ok for logging: http://googletesting.blogspot.com/2008/08/root-cause-of-singletons.html""" 24 | # Formerly I was doing something similar to this singleton: http://code.activestate.com/recipes/102263-logfilepy-a-singleton-log-file-creator/ 25 | 26 | def close_file(): 27 | ''' Call this before telling the user to check the log, so they can see it all. 28 | This releases the log file without blowing away its object's error count, etc. 29 | You cannot then write to the log again without first doing a reset() or init().''' 30 | if __MAIN_LOG: 31 | __MAIN_LOG.close_file() 32 | 33 | def init(fp=os.path.join(LOG_PATH, LOG_FILE), v=VERBOSITY): 34 | '''Must be called before you can start logging.''' 35 | global __MAIN_LOG 36 | close_file() 37 | __MAIN_LOG = Lg(fp, v) 38 | 39 | def reset(): 40 | init(__MAIN_LOG.path, __MAIN_LOG.verbosity) 41 | 42 | #def close(): #Not needed? Just do init? 43 | # """Basically de-instantiates the singleton (in case the user might run the calling code again in the same session)""" 44 | # release() 45 | # __MAIN_LOG = None 46 | 47 | def get_main_log(): return __MAIN_LOG 48 | 49 | # convenience methods: 50 | def error(msg): __MAIN_LOG.error(msg) 51 | def warn(msg): __MAIN_LOG.warn(msg) 52 | def w(msg): __MAIN_LOG.w(msg) 53 | def debug(msg): __MAIN_LOG.debug(msg) 54 | def error_count(): return __MAIN_LOG.error_count() 55 | def warn_count(): return __MAIN_LOG.warn_count() 56 | 57 | 58 | class Lg: 59 | '''A log class that both prints to stdout and writes to a log file. 60 | This is not a singleton, so it could be used, say, for a secondary log.''' 61 | 62 | def __init__(self,fp=os.path.join(LOG_PATH, LOG_FILE), v=VERBOSITY): 63 | self.path = fp 64 | self.verbosity = v 65 | self._num_errors, self._num_warnings = 0, 0 66 | 67 | self.f = open(fp, 'w+b') #will silently overwrite the log file 68 | self._w('Logging (verbosity level {}). Python interpreter is version {}.'.format(v, platform.python_version())) 69 | self._w('You are encouraged to search this log for "MSG:" labels to review errors and warnings.') 70 | self._w('This log is *overwritten* each time the addon runs, unless you make a copy.\n') 71 | 72 | def _w(self, msg, label = ''): 73 | ''' Write to the log. (Rewrite this method if we ever upgrade to Python 3.) ''' 74 | s = '{}{}\n'.format(label, msg) 75 | out = s.encode('utf-8') # don't crash the file output! 76 | self.f.write(out) 77 | s = s.encode('ASCII', 'replace') # don't crash the console! 78 | print(s) 79 | 80 | def w(self, msg, label=''): 81 | if self.verbosity >= VERBOSITY_NORMAL: 82 | self._w(msg, label) 83 | 84 | def debug(self, msg): 85 | if self.verbosity >= VERBOSITY_DEBUG: 86 | self._w(msg) 87 | 88 | def warn(self, msg): 89 | self._num_warnings += 1 90 | if self.verbosity > VERBOSITY_SILENT: 91 | self._w(msg, 'WARNING MSG: ') 92 | 93 | def error(self, msg): 94 | self._num_errors += 1 95 | if self.verbosity > VERBOSITY_SILENT: 96 | self._w(msg, 'ERROR MSG: ') 97 | 98 | def close_file(self): 99 | if self.f: 100 | self.f.close() 101 | 102 | def error_count(self): return self._num_errors 103 | 104 | def warn_count(self): return self._num_warnings 105 | 106 | -------------------------------------------------------------------------------- /syncxml/samples/Lindu-Test.lift: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 |
5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 |
This records the syllable pattern for a LexPronunciation in FieldWorks.
34 |
35 | 36 |
This records the tone information for a LexPronunciation in FieldWorks.
37 |
38 | 39 |
This records a comment (note) in a LexEtymology in FieldWorks.
40 |
41 | 42 |
This records residue left over from importing a standard format file into FieldWorks (or LinguaLinks).
43 |
44 | 45 |
This field is used to store a literal meaning of the entry. Typically, this field is necessary only for a compound or an idiom where the meaning of the whole is different from the sum of its parts.
46 |
47 | 48 |
A summary definition (located at the entry level in the Entry pane) is a general definition summarizing all the senses of a primary entry. It has no theoretical value; its use is solely pragmatic.
49 |
50 | 51 |
This field stores the scientific name pertinent to the current sense.
52 |
53 | 54 |
55 |
Class=LexEntry; Type=String; WsSelector=kwsVern
56 |
57 | 58 |
Y/N: Should the Pronunciation field be published? (accurate + unpredictable?) Also a holding place.
59 |
Class=LexEntry; Type=String; WsSelector=kwsAnal
60 |
61 | 62 |
Info regarding whether to publish this lexeme
63 |
Class=LexEntry; Type=String; WsSelector=kwsAnal
64 |
65 | 66 |
67 |
Class=LexSense; Type=MultiBigString; WsSelector=kwsAnalVerns
68 |
69 |
70 |
71 | 72 | 73 | 74 |
ata
75 |
76 | 77 | 78 |
ata
79 |
80 | 81 |
ata
82 |
83 | 84 |
anki
85 |
86 | 87 | 88 | 89 | roof 90 | atap 91 | 92 |
roof
93 |
atap
94 |
95 | 96 |
roof
97 |
98 |
atap
99 |
100 |
101 |
102 | 103 | 104 | 105 |
awinga
106 |
107 | 108 | 109 |
aβiŋa
110 |
111 | 112 |
awinga
113 |
114 | 115 |
anki
116 |
117 | 118 | 119 | 120 | shoulder 121 | bahu 122 | 123 |
shoulder
124 |
bahu
125 |
126 | 127 | 128 |
bahu
129 |
130 |
shoulder
131 |
132 |
133 |
134 | 135 | 136 | 137 |
koro
138 |
139 | 140 | 141 |
koɾo
142 |
143 | 144 |
koro
145 |
146 | 147 |
anki
148 |
149 | 150 | 151 | 152 | body 153 | tubuh 154 | 155 |
body
156 |
tubuh; badan
157 |
158 | 159 | 160 |
tubuh
161 |
162 |
body
163 |
164 |
badan
165 |
166 |
167 |
168 | 169 | 170 | 171 |
lori
172 |
173 | 174 | 175 |
loɾi
176 |
177 | 178 |
anki
179 |
180 | 181 | 182 | 183 | bald 184 | gundul 185 | 186 |
buzz cut; bald
187 |
cukur rata; gundul
188 |
189 |
gundul
190 |
191 |
buzz cut
192 |
193 |
bald
194 |
195 |
196 |
197 | 198 | 199 | 200 |
nagali
201 |
634710572710000000nagali.wav
202 |
203 | 204 | 205 | 206 | 207 | 208 | 209 |
nagaɽi
210 |
211 | 212 |
na-~ gali
213 |
214 | 215 |
anki
216 |
217 | 218 | 219 | 220 | clean 221 | bersih 222 | 223 |
clean
224 |
bersih
225 |
226 | 227 |
Raelasi sampe magali.
228 |
634710573956406250A LexExampleSentence.wav
229 | 230 |
Dicuci sampai bersih.
231 |
232 |
233 | 234 | 235 |
bersih
236 |
237 |
clean
238 |
239 | 240 | 246 |
247 |
248 |
249 | -------------------------------------------------------------------------------- /syncxml/samples/audio/634710572710000000nagali.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/audio/634710572710000000nagali.wav -------------------------------------------------------------------------------- /syncxml/samples/audio/634710573956406250A LexExampleSentence.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/audio/634710573956406250A LexExampleSentence.wav -------------------------------------------------------------------------------- /syncxml/samples/indonesian-lift-dictionary.apkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/indonesian-lift-dictionary.apkg -------------------------------------------------------------------------------- /syncxml/samples/lift-dictionary.apkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/lift-dictionary.apkg -------------------------------------------------------------------------------- /syncxml/samples/pictures/bersih.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/pictures/bersih.gif -------------------------------------------------------------------------------- /tests/Moma/Moma.WeSayConfig: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | LexEntry 9 | MultiText 10 | Word 11 | True 12 | EntryLexicalForm 13 | False 14 | False 15 | ZeroOr1 16 | 17 | Visible 18 | 19 | myl 20 | myl-Zxxx-x-audio 21 | 22 | 23 | 24 | LexEntry 25 | MultiText 26 | Citation Form 27 | False 28 | citation 29 | False 30 | False 31 | ZeroOr1 32 | NormallyHidden 33 | 34 | myl 35 | 36 | 37 | 38 | LexSense 39 | MultiText 40 | Definition (Meaning) 41 | True 42 | definition 43 | False 44 | True 45 | ZeroOr1 46 | Visible 47 | 48 | en 49 | en-Zxxx-x-audio 50 | es 51 | es-Zxxx-x-audio 52 | 53 | 54 | 55 | LexSense 56 | MultiText 57 | Gloss 58 | False 59 | gloss 60 | False 61 | True 62 | ZeroOr1 63 | NormallyHidden 64 | 65 | en 66 | en-Zxxx-x-audio 67 | es 68 | es-Zxxx-x-audio 69 | 70 | 71 | 72 | LexEntry 73 | MultiText 74 | Literal Meaning 75 | False 76 | literal-meaning 77 | False 78 | True 79 | ZeroOr1 80 | NormallyHidden 81 | 82 | en-Zxxx-x-audio 83 | 84 | 85 | 86 | PalasoDataObject 87 | MultiText 88 | Note 89 | False 90 | note 91 | True 92 | True 93 | ZeroOr1 94 | NormallyHidden 95 | 96 | en 97 | 98 | 99 | 100 | LexSense 101 | Picture 102 | Picture 103 | True 104 | Picture 105 | False 106 | False 107 | ZeroOr1 108 | NormallyHidden 109 | 110 | en-Zxxx-x-audio 111 | 112 | 113 | 114 | LexSense 115 | Option 116 | PartOfSpeech 117 | True 118 | POS 119 | False 120 | False 121 | ZeroOr1 122 | PartsOfSpeech.xml 123 | Visible 124 | 125 | en 126 | 127 | 128 | 129 | LexSense 130 | MultiText 131 | SILCAWL 132 | False 133 | SILCAWL 134 | True 135 | False 136 | ZeroOr1 137 | NormallyHidden 138 | 139 | en-Zxxx-x-audio 140 | 141 | 142 | 143 | LexExampleSentence 144 | MultiText 145 | Example Sentence 146 | True 147 | ExampleSentence 148 | False 149 | True 150 | ZeroOr1 151 | 152 | Visible 153 | 154 | myl 155 | myl-Zxxx-x-audio 156 | 157 | 158 | 159 | LexExampleSentence 160 | MultiText 161 | Example Translation 162 | True 163 | ExampleTranslation 164 | False 165 | True 166 | ZeroOr1 167 | Visible 168 | 169 | en 170 | en-Zxxx-x-audio 171 | es 172 | es-Zxxx-x-audio 173 | 174 | 175 | 176 | LexSense 177 | OptionCollection 178 | Sem Dom 179 | True 180 | semantic-domain-ddp4 181 | False 182 | False 183 | ZeroOr1 184 | Ddp4.xml 185 | NormallyHidden 186 | 187 | en 188 | 189 | 190 | 191 | LexEntry 192 | RelationToOneEntry 193 | Base Form 194 | False 195 | BaseForm 196 | False 197 | False 198 | ZeroOr1 199 | NormallyHidden 200 | 201 | myl 202 | 203 | 204 | 205 | LexEntry 206 | RelationToOneEntry 207 | Cross Reference 208 | False 209 | confer 210 | False 211 | False 212 | ZeroOrMore 213 | NormallyHidden 214 | 215 | myl 216 | 217 | 218 | 219 | Default View Template 220 | 221 | 222 | 223 | 226 | 229 | 232 | 233 | Add Meanings 234 | Add meanings (senses) to entries where they are missing. 235 | definition 236 | definition 237 | semantic-domain-ddp4 238 | 239 | 240 | 241 | 244 | 245 | Add Parts of Speech 246 | Add parts of speech to senses where they are missing. 247 | POS 248 | POS 249 | definition, ExampleSentence 250 | 251 | 252 | 253 | 256 | 257 | Add Example Sentences 258 | Add example sentences to senses where they are missing. 259 | ExampleSentence 260 | ExampleSentence 261 | definition 262 | 263 | 264 | 265 | 268 | 269 | Add Base Forms 270 | Identify the "base form" word that this word is built from. In the printed dictionary, the derived or variant words can optionally be shown as subentries of their base forms. 271 | BaseForm 272 | BaseForm 273 | 274 | 275 | 276 | 277 | 280 | 283 | 286 | SILCAWL 287 | en 288 | 289 | 292 | SILCAWL-MozambiqueAddendum 293 | en 294 | 295 | 298 | Ddp4Questions-en.xml 299 | False 300 | 301 | 302 | 303 | 306 | 307 | -------------------------------------------------------------------------------- /tests/Moma/Moma.lift: -------------------------------------------------------------------------------- 1 |  2 | 5 | 10 | 11 |
13 | loka 14 |
15 |
17 | adat-63531587437330.wav 18 |
19 |
20 | 22 | 24 | 25 |
27 | banana 28 |
29 |
31 | platano 32 |
33 |
35 | loka-63531587541460.wav 36 |
37 |
38 | 40 |
41 |
42 | 47 | 48 |
50 | mata 51 |
52 |
54 | mata-63531587590792.wav 55 |
56 |
57 | 59 | 61 | 62 |
64 | eye 65 |
66 |
68 | ojo 69 |
70 |
72 | mata-63531587597879.wav 73 |
74 |
75 | 77 |
78 |
79 |
-------------------------------------------------------------------------------- /tests/Moma/audio/adat-63531587437330.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/adat-63531587437330.wav -------------------------------------------------------------------------------- /tests/Moma/audio/loka-63531587541460.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/loka-63531587541460.wav -------------------------------------------------------------------------------- /tests/Moma/audio/loka-63531587545733.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/loka-63531587545733.wav -------------------------------------------------------------------------------- /tests/Moma/audio/mata-63531587590792.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/mata-63531587590792.wav -------------------------------------------------------------------------------- /tests/Moma/audio/mata-63531587597879.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/mata-63531587597879.wav -------------------------------------------------------------------------------- /tests/Moma/pictures/cultural law.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/pictures/cultural law.png -------------------------------------------------------------------------------- /tests/Moma/pictures/sdd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/pictures/sdd.png -------------------------------------------------------------------------------- /this-is-an-eclipse-pydev-project.txt: -------------------------------------------------------------------------------- 1 | I.e. an Eclipse project (.project) that is specifically a Pydev project (.pydevproject). -------------------------------------------------------------------------------- /xml/__init__.py: -------------------------------------------------------------------------------- 1 | """Core XML support for Python. 2 | 3 | This package contains four sub-packages: 4 | 5 | dom -- The W3C Document Object Model. This supports DOM Level 1 + 6 | Namespaces. 7 | 8 | parsers -- Python wrappers for XML parsers (currently only supports Expat). 9 | 10 | sax -- The Simple API for XML, developed by XML-Dev, led by David 11 | Megginson and ported to Python by Lars Marius Garshol. This 12 | supports the SAX 2 API. 13 | 14 | etree -- The ElementTree XML library. This is a subset of the full 15 | ElementTree XML release. 16 | 17 | """ 18 | 19 | 20 | __all__ = ["dom", "parsers", "sax", "etree"] 21 | -------------------------------------------------------------------------------- /xml/dom/NodeFilter.py: -------------------------------------------------------------------------------- 1 | # This is the Python mapping for interface NodeFilter from 2 | # DOM2-Traversal-Range. It contains only constants. 3 | 4 | class NodeFilter: 5 | """ 6 | This is the DOM2 NodeFilter interface. It contains only constants. 7 | """ 8 | FILTER_ACCEPT = 1 9 | FILTER_REJECT = 2 10 | FILTER_SKIP = 3 11 | 12 | SHOW_ALL = 0xFFFFFFFF 13 | SHOW_ELEMENT = 0x00000001 14 | SHOW_ATTRIBUTE = 0x00000002 15 | SHOW_TEXT = 0x00000004 16 | SHOW_CDATA_SECTION = 0x00000008 17 | SHOW_ENTITY_REFERENCE = 0x00000010 18 | SHOW_ENTITY = 0x00000020 19 | SHOW_PROCESSING_INSTRUCTION = 0x00000040 20 | SHOW_COMMENT = 0x00000080 21 | SHOW_DOCUMENT = 0x00000100 22 | SHOW_DOCUMENT_TYPE = 0x00000200 23 | SHOW_DOCUMENT_FRAGMENT = 0x00000400 24 | SHOW_NOTATION = 0x00000800 25 | 26 | def acceptNode(self, node): 27 | raise NotImplementedError 28 | -------------------------------------------------------------------------------- /xml/dom/__init__.py: -------------------------------------------------------------------------------- 1 | """W3C Document Object Model implementation for Python. 2 | 3 | The Python mapping of the Document Object Model is documented in the 4 | Python Library Reference in the section on the xml.dom package. 5 | 6 | This package contains the following modules: 7 | 8 | minidom -- A simple implementation of the Level 1 DOM with namespace 9 | support added (based on the Level 2 specification) and other 10 | minor Level 2 functionality. 11 | 12 | pulldom -- DOM builder supporting on-demand tree-building for selected 13 | subtrees of the document. 14 | 15 | """ 16 | 17 | 18 | class Node: 19 | """Class giving the NodeType constants.""" 20 | __slots__ = () 21 | 22 | # DOM implementations may use this as a base class for their own 23 | # Node implementations. If they don't, the constants defined here 24 | # should still be used as the canonical definitions as they match 25 | # the values given in the W3C recommendation. Client code can 26 | # safely refer to these values in all tests of Node.nodeType 27 | # values. 28 | 29 | ELEMENT_NODE = 1 30 | ATTRIBUTE_NODE = 2 31 | TEXT_NODE = 3 32 | CDATA_SECTION_NODE = 4 33 | ENTITY_REFERENCE_NODE = 5 34 | ENTITY_NODE = 6 35 | PROCESSING_INSTRUCTION_NODE = 7 36 | COMMENT_NODE = 8 37 | DOCUMENT_NODE = 9 38 | DOCUMENT_TYPE_NODE = 10 39 | DOCUMENT_FRAGMENT_NODE = 11 40 | NOTATION_NODE = 12 41 | 42 | 43 | #ExceptionCode 44 | INDEX_SIZE_ERR = 1 45 | DOMSTRING_SIZE_ERR = 2 46 | HIERARCHY_REQUEST_ERR = 3 47 | WRONG_DOCUMENT_ERR = 4 48 | INVALID_CHARACTER_ERR = 5 49 | NO_DATA_ALLOWED_ERR = 6 50 | NO_MODIFICATION_ALLOWED_ERR = 7 51 | NOT_FOUND_ERR = 8 52 | NOT_SUPPORTED_ERR = 9 53 | INUSE_ATTRIBUTE_ERR = 10 54 | INVALID_STATE_ERR = 11 55 | SYNTAX_ERR = 12 56 | INVALID_MODIFICATION_ERR = 13 57 | NAMESPACE_ERR = 14 58 | INVALID_ACCESS_ERR = 15 59 | VALIDATION_ERR = 16 60 | 61 | 62 | class DOMException(Exception): 63 | """Abstract base class for DOM exceptions. 64 | Exceptions with specific codes are specializations of this class.""" 65 | 66 | def __init__(self, *args, **kw): 67 | if self.__class__ is DOMException: 68 | raise RuntimeError( 69 | "DOMException should not be instantiated directly") 70 | Exception.__init__(self, *args, **kw) 71 | 72 | def _get_code(self): 73 | return self.code 74 | 75 | 76 | class IndexSizeErr(DOMException): 77 | code = INDEX_SIZE_ERR 78 | 79 | class DomstringSizeErr(DOMException): 80 | code = DOMSTRING_SIZE_ERR 81 | 82 | class HierarchyRequestErr(DOMException): 83 | code = HIERARCHY_REQUEST_ERR 84 | 85 | class WrongDocumentErr(DOMException): 86 | code = WRONG_DOCUMENT_ERR 87 | 88 | class InvalidCharacterErr(DOMException): 89 | code = INVALID_CHARACTER_ERR 90 | 91 | class NoDataAllowedErr(DOMException): 92 | code = NO_DATA_ALLOWED_ERR 93 | 94 | class NoModificationAllowedErr(DOMException): 95 | code = NO_MODIFICATION_ALLOWED_ERR 96 | 97 | class NotFoundErr(DOMException): 98 | code = NOT_FOUND_ERR 99 | 100 | class NotSupportedErr(DOMException): 101 | code = NOT_SUPPORTED_ERR 102 | 103 | class InuseAttributeErr(DOMException): 104 | code = INUSE_ATTRIBUTE_ERR 105 | 106 | class InvalidStateErr(DOMException): 107 | code = INVALID_STATE_ERR 108 | 109 | class SyntaxErr(DOMException): 110 | code = SYNTAX_ERR 111 | 112 | class InvalidModificationErr(DOMException): 113 | code = INVALID_MODIFICATION_ERR 114 | 115 | class NamespaceErr(DOMException): 116 | code = NAMESPACE_ERR 117 | 118 | class InvalidAccessErr(DOMException): 119 | code = INVALID_ACCESS_ERR 120 | 121 | class ValidationErr(DOMException): 122 | code = VALIDATION_ERR 123 | 124 | class UserDataHandler: 125 | """Class giving the operation constants for UserDataHandler.handle().""" 126 | 127 | # Based on DOM Level 3 (WD 9 April 2002) 128 | 129 | NODE_CLONED = 1 130 | NODE_IMPORTED = 2 131 | NODE_DELETED = 3 132 | NODE_RENAMED = 4 133 | 134 | XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" 135 | XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" 136 | XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" 137 | EMPTY_NAMESPACE = None 138 | EMPTY_PREFIX = None 139 | 140 | from .domreg import getDOMImplementation, registerDOMImplementation 141 | -------------------------------------------------------------------------------- /xml/dom/domreg.py: -------------------------------------------------------------------------------- 1 | """Registration facilities for DOM. This module should not be used 2 | directly. Instead, the functions getDOMImplementation and 3 | registerDOMImplementation should be imported from xml.dom.""" 4 | 5 | # This is a list of well-known implementations. Well-known names 6 | # should be published by posting to xml-sig@python.org, and are 7 | # subsequently recorded in this file. 8 | 9 | import sys 10 | 11 | well_known_implementations = { 12 | 'minidom':'xml.dom.minidom', 13 | '4DOM': 'xml.dom.DOMImplementation', 14 | } 15 | 16 | # DOM implementations not officially registered should register 17 | # themselves with their 18 | 19 | registered = {} 20 | 21 | def registerDOMImplementation(name, factory): 22 | """registerDOMImplementation(name, factory) 23 | 24 | Register the factory function with the name. The factory function 25 | should return an object which implements the DOMImplementation 26 | interface. The factory function can either return the same object, 27 | or a new one (e.g. if that implementation supports some 28 | customization).""" 29 | 30 | registered[name] = factory 31 | 32 | def _good_enough(dom, features): 33 | "_good_enough(dom, features) -> Return 1 if the dom offers the features" 34 | for f,v in features: 35 | if not dom.hasFeature(f,v): 36 | return 0 37 | return 1 38 | 39 | def getDOMImplementation(name=None, features=()): 40 | """getDOMImplementation(name = None, features = ()) -> DOM implementation. 41 | 42 | Return a suitable DOM implementation. The name is either 43 | well-known, the module name of a DOM implementation, or None. If 44 | it is not None, imports the corresponding module and returns 45 | DOMImplementation object if the import succeeds. 46 | 47 | If name is not given, consider the available implementations to 48 | find one with the required feature set. If no implementation can 49 | be found, raise an ImportError. The features list must be a sequence 50 | of (feature, version) pairs which are passed to hasFeature.""" 51 | 52 | import os 53 | creator = None 54 | mod = well_known_implementations.get(name) 55 | if mod: 56 | mod = __import__(mod, {}, {}, ['getDOMImplementation']) 57 | return mod.getDOMImplementation() 58 | elif name: 59 | return registered[name]() 60 | elif not sys.flags.ignore_environment and "PYTHON_DOM" in os.environ: 61 | return getDOMImplementation(name = os.environ["PYTHON_DOM"]) 62 | 63 | # User did not specify a name, try implementations in arbitrary 64 | # order, returning the one that has the required features 65 | if isinstance(features, str): 66 | features = _parse_feature_string(features) 67 | for creator in registered.values(): 68 | dom = creator() 69 | if _good_enough(dom, features): 70 | return dom 71 | 72 | for creator in well_known_implementations.keys(): 73 | try: 74 | dom = getDOMImplementation(name = creator) 75 | except Exception: # typically ImportError, or AttributeError 76 | continue 77 | if _good_enough(dom, features): 78 | return dom 79 | 80 | raise ImportError("no suitable DOM implementation found") 81 | 82 | def _parse_feature_string(s): 83 | features = [] 84 | parts = s.split() 85 | i = 0 86 | length = len(parts) 87 | while i < length: 88 | feature = parts[i] 89 | if feature[0] in "0123456789": 90 | raise ValueError("bad feature name: %r" % (feature,)) 91 | i = i + 1 92 | version = None 93 | if i < length: 94 | v = parts[i] 95 | if v[0] in "0123456789": 96 | i = i + 1 97 | version = v 98 | features.append((feature, version)) 99 | return tuple(features) 100 | -------------------------------------------------------------------------------- /xml/dom/minicompat.py: -------------------------------------------------------------------------------- 1 | """Python version compatibility support for minidom. 2 | 3 | This module contains internal implementation details and 4 | should not be imported; use xml.dom.minidom instead. 5 | """ 6 | 7 | # This module should only be imported using "import *". 8 | # 9 | # The following names are defined: 10 | # 11 | # NodeList -- lightest possible NodeList implementation 12 | # 13 | # EmptyNodeList -- lightest possible NodeList that is guaranteed to 14 | # remain empty (immutable) 15 | # 16 | # StringTypes -- tuple of defined string types 17 | # 18 | # defproperty -- function used in conjunction with GetattrMagic; 19 | # using these together is needed to make them work 20 | # as efficiently as possible in both Python 2.2+ 21 | # and older versions. For example: 22 | # 23 | # class MyClass(GetattrMagic): 24 | # def _get_myattr(self): 25 | # return something 26 | # 27 | # defproperty(MyClass, "myattr", 28 | # "return some value") 29 | # 30 | # For Python 2.2 and newer, this will construct a 31 | # property object on the class, which avoids 32 | # needing to override __getattr__(). It will only 33 | # work for read-only attributes. 34 | # 35 | # For older versions of Python, inheriting from 36 | # GetattrMagic will use the traditional 37 | # __getattr__() hackery to achieve the same effect, 38 | # but less efficiently. 39 | # 40 | # defproperty() should be used for each version of 41 | # the relevant _get_() function. 42 | 43 | __all__ = ["NodeList", "EmptyNodeList", "StringTypes", "defproperty"] 44 | 45 | import xml.dom 46 | 47 | StringTypes = (str,) 48 | 49 | 50 | class NodeList(list): 51 | __slots__ = () 52 | 53 | def item(self, index): 54 | if 0 <= index < len(self): 55 | return self[index] 56 | 57 | def _get_length(self): 58 | return len(self) 59 | 60 | def _set_length(self, value): 61 | raise xml.dom.NoModificationAllowedErr( 62 | "attempt to modify read-only attribute 'length'") 63 | 64 | length = property(_get_length, _set_length, 65 | doc="The number of nodes in the NodeList.") 66 | 67 | # For backward compatibility 68 | def __setstate__(self, state): 69 | if state is None: 70 | state = [] 71 | self[:] = state 72 | 73 | 74 | class EmptyNodeList(tuple): 75 | __slots__ = () 76 | 77 | def __add__(self, other): 78 | NL = NodeList() 79 | NL.extend(other) 80 | return NL 81 | 82 | def __radd__(self, other): 83 | NL = NodeList() 84 | NL.extend(other) 85 | return NL 86 | 87 | def item(self, index): 88 | return None 89 | 90 | def _get_length(self): 91 | return 0 92 | 93 | def _set_length(self, value): 94 | raise xml.dom.NoModificationAllowedErr( 95 | "attempt to modify read-only attribute 'length'") 96 | 97 | length = property(_get_length, _set_length, 98 | doc="The number of nodes in the NodeList.") 99 | 100 | 101 | def defproperty(klass, name, doc): 102 | get = getattr(klass, ("_get_" + name)) 103 | def set(self, value, name=name): 104 | raise xml.dom.NoModificationAllowedErr( 105 | "attempt to modify read-only attribute " + repr(name)) 106 | assert not hasattr(klass, "_set_" + name), \ 107 | "expected not to find _set_" + name 108 | prop = property(get, set, doc=doc) 109 | setattr(klass, name, prop) 110 | -------------------------------------------------------------------------------- /xml/dom/pulldom.py: -------------------------------------------------------------------------------- 1 | import xml.sax 2 | import xml.sax.handler 3 | 4 | START_ELEMENT = "START_ELEMENT" 5 | END_ELEMENT = "END_ELEMENT" 6 | COMMENT = "COMMENT" 7 | START_DOCUMENT = "START_DOCUMENT" 8 | END_DOCUMENT = "END_DOCUMENT" 9 | PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" 10 | IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" 11 | CHARACTERS = "CHARACTERS" 12 | 13 | class PullDOM(xml.sax.ContentHandler): 14 | _locator = None 15 | document = None 16 | 17 | def __init__(self, documentFactory=None): 18 | from xml.dom import XML_NAMESPACE 19 | self.documentFactory = documentFactory 20 | self.firstEvent = [None, None] 21 | self.lastEvent = self.firstEvent 22 | self.elementStack = [] 23 | self.push = self.elementStack.append 24 | try: 25 | self.pop = self.elementStack.pop 26 | except AttributeError: 27 | # use class' pop instead 28 | pass 29 | self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts 30 | self._current_context = self._ns_contexts[-1] 31 | self.pending_events = [] 32 | 33 | def pop(self): 34 | result = self.elementStack[-1] 35 | del self.elementStack[-1] 36 | return result 37 | 38 | def setDocumentLocator(self, locator): 39 | self._locator = locator 40 | 41 | def startPrefixMapping(self, prefix, uri): 42 | if not hasattr(self, '_xmlns_attrs'): 43 | self._xmlns_attrs = [] 44 | self._xmlns_attrs.append((prefix or 'xmlns', uri)) 45 | self._ns_contexts.append(self._current_context.copy()) 46 | self._current_context[uri] = prefix or None 47 | 48 | def endPrefixMapping(self, prefix): 49 | self._current_context = self._ns_contexts.pop() 50 | 51 | def startElementNS(self, name, tagName , attrs): 52 | # Retrieve xml namespace declaration attributes. 53 | xmlns_uri = 'http://www.w3.org/2000/xmlns/' 54 | xmlns_attrs = getattr(self, '_xmlns_attrs', None) 55 | if xmlns_attrs is not None: 56 | for aname, value in xmlns_attrs: 57 | attrs._attrs[(xmlns_uri, aname)] = value 58 | self._xmlns_attrs = [] 59 | uri, localname = name 60 | if uri: 61 | # When using namespaces, the reader may or may not 62 | # provide us with the original name. If not, create 63 | # *a* valid tagName from the current context. 64 | if tagName is None: 65 | prefix = self._current_context[uri] 66 | if prefix: 67 | tagName = prefix + ":" + localname 68 | else: 69 | tagName = localname 70 | if self.document: 71 | node = self.document.createElementNS(uri, tagName) 72 | else: 73 | node = self.buildDocument(uri, tagName) 74 | else: 75 | # When the tagname is not prefixed, it just appears as 76 | # localname 77 | if self.document: 78 | node = self.document.createElement(localname) 79 | else: 80 | node = self.buildDocument(None, localname) 81 | 82 | for aname,value in attrs.items(): 83 | a_uri, a_localname = aname 84 | if a_uri == xmlns_uri: 85 | if a_localname == 'xmlns': 86 | qname = a_localname 87 | else: 88 | qname = 'xmlns:' + a_localname 89 | attr = self.document.createAttributeNS(a_uri, qname) 90 | node.setAttributeNodeNS(attr) 91 | elif a_uri: 92 | prefix = self._current_context[a_uri] 93 | if prefix: 94 | qname = prefix + ":" + a_localname 95 | else: 96 | qname = a_localname 97 | attr = self.document.createAttributeNS(a_uri, qname) 98 | node.setAttributeNodeNS(attr) 99 | else: 100 | attr = self.document.createAttribute(a_localname) 101 | node.setAttributeNode(attr) 102 | attr.value = value 103 | 104 | self.lastEvent[1] = [(START_ELEMENT, node), None] 105 | self.lastEvent = self.lastEvent[1] 106 | self.push(node) 107 | 108 | def endElementNS(self, name, tagName): 109 | self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] 110 | self.lastEvent = self.lastEvent[1] 111 | 112 | def startElement(self, name, attrs): 113 | if self.document: 114 | node = self.document.createElement(name) 115 | else: 116 | node = self.buildDocument(None, name) 117 | 118 | for aname,value in attrs.items(): 119 | attr = self.document.createAttribute(aname) 120 | attr.value = value 121 | node.setAttributeNode(attr) 122 | 123 | self.lastEvent[1] = [(START_ELEMENT, node), None] 124 | self.lastEvent = self.lastEvent[1] 125 | self.push(node) 126 | 127 | def endElement(self, name): 128 | self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] 129 | self.lastEvent = self.lastEvent[1] 130 | 131 | def comment(self, s): 132 | if self.document: 133 | node = self.document.createComment(s) 134 | self.lastEvent[1] = [(COMMENT, node), None] 135 | self.lastEvent = self.lastEvent[1] 136 | else: 137 | event = [(COMMENT, s), None] 138 | self.pending_events.append(event) 139 | 140 | def processingInstruction(self, target, data): 141 | if self.document: 142 | node = self.document.createProcessingInstruction(target, data) 143 | self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] 144 | self.lastEvent = self.lastEvent[1] 145 | else: 146 | event = [(PROCESSING_INSTRUCTION, target, data), None] 147 | self.pending_events.append(event) 148 | 149 | def ignorableWhitespace(self, chars): 150 | node = self.document.createTextNode(chars) 151 | self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] 152 | self.lastEvent = self.lastEvent[1] 153 | 154 | def characters(self, chars): 155 | node = self.document.createTextNode(chars) 156 | self.lastEvent[1] = [(CHARACTERS, node), None] 157 | self.lastEvent = self.lastEvent[1] 158 | 159 | def startDocument(self): 160 | if self.documentFactory is None: 161 | import xml.dom.minidom 162 | self.documentFactory = xml.dom.minidom.Document.implementation 163 | 164 | def buildDocument(self, uri, tagname): 165 | # Can't do that in startDocument, since we need the tagname 166 | # XXX: obtain DocumentType 167 | node = self.documentFactory.createDocument(uri, tagname, None) 168 | self.document = node 169 | self.lastEvent[1] = [(START_DOCUMENT, node), None] 170 | self.lastEvent = self.lastEvent[1] 171 | self.push(node) 172 | # Put everything we have seen so far into the document 173 | for e in self.pending_events: 174 | if e[0][0] == PROCESSING_INSTRUCTION: 175 | _,target,data = e[0] 176 | n = self.document.createProcessingInstruction(target, data) 177 | e[0] = (PROCESSING_INSTRUCTION, n) 178 | elif e[0][0] == COMMENT: 179 | n = self.document.createComment(e[0][1]) 180 | e[0] = (COMMENT, n) 181 | else: 182 | raise AssertionError("Unknown pending event ",e[0][0]) 183 | self.lastEvent[1] = e 184 | self.lastEvent = e 185 | self.pending_events = None 186 | return node.firstChild 187 | 188 | def endDocument(self): 189 | self.lastEvent[1] = [(END_DOCUMENT, self.document), None] 190 | self.pop() 191 | 192 | def clear(self): 193 | "clear(): Explicitly release parsing structures" 194 | self.document = None 195 | 196 | class ErrorHandler: 197 | def warning(self, exception): 198 | print(exception) 199 | def error(self, exception): 200 | raise exception 201 | def fatalError(self, exception): 202 | raise exception 203 | 204 | class DOMEventStream: 205 | def __init__(self, stream, parser, bufsize): 206 | self.stream = stream 207 | self.parser = parser 208 | self.bufsize = bufsize 209 | if not hasattr(self.parser, 'feed'): 210 | self.getEvent = self._slurp 211 | self.reset() 212 | 213 | def reset(self): 214 | self.pulldom = PullDOM() 215 | # This content handler relies on namespace support 216 | self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) 217 | self.parser.setContentHandler(self.pulldom) 218 | 219 | def __getitem__(self, pos): 220 | import warnings 221 | warnings.warn( 222 | "DOMEventStream's __getitem__ method ignores 'pos' parameter. " 223 | "Use iterator protocol instead.", 224 | DeprecationWarning, 225 | stacklevel=2 226 | ) 227 | rc = self.getEvent() 228 | if rc: 229 | return rc 230 | raise IndexError 231 | 232 | def __next__(self): 233 | rc = self.getEvent() 234 | if rc: 235 | return rc 236 | raise StopIteration 237 | 238 | def __iter__(self): 239 | return self 240 | 241 | def expandNode(self, node): 242 | event = self.getEvent() 243 | parents = [node] 244 | while event: 245 | token, cur_node = event 246 | if cur_node is node: 247 | return 248 | if token != END_ELEMENT: 249 | parents[-1].appendChild(cur_node) 250 | if token == START_ELEMENT: 251 | parents.append(cur_node) 252 | elif token == END_ELEMENT: 253 | del parents[-1] 254 | event = self.getEvent() 255 | 256 | def getEvent(self): 257 | # use IncrementalParser interface, so we get the desired 258 | # pull effect 259 | if not self.pulldom.firstEvent[1]: 260 | self.pulldom.lastEvent = self.pulldom.firstEvent 261 | while not self.pulldom.firstEvent[1]: 262 | buf = self.stream.read(self.bufsize) 263 | if not buf: 264 | self.parser.close() 265 | return None 266 | self.parser.feed(buf) 267 | rc = self.pulldom.firstEvent[1][0] 268 | self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] 269 | return rc 270 | 271 | def _slurp(self): 272 | """ Fallback replacement for getEvent() using the 273 | standard SAX2 interface, which means we slurp the 274 | SAX events into memory (no performance gain, but 275 | we are compatible to all SAX parsers). 276 | """ 277 | self.parser.parse(self.stream) 278 | self.getEvent = self._emit 279 | return self._emit() 280 | 281 | def _emit(self): 282 | """ Fallback replacement for getEvent() that emits 283 | the events that _slurp() read previously. 284 | """ 285 | rc = self.pulldom.firstEvent[1][0] 286 | self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] 287 | return rc 288 | 289 | def clear(self): 290 | """clear(): Explicitly release parsing objects""" 291 | self.pulldom.clear() 292 | del self.pulldom 293 | self.parser = None 294 | self.stream = None 295 | 296 | class SAX2DOM(PullDOM): 297 | 298 | def startElementNS(self, name, tagName , attrs): 299 | PullDOM.startElementNS(self, name, tagName, attrs) 300 | curNode = self.elementStack[-1] 301 | parentNode = self.elementStack[-2] 302 | parentNode.appendChild(curNode) 303 | 304 | def startElement(self, name, attrs): 305 | PullDOM.startElement(self, name, attrs) 306 | curNode = self.elementStack[-1] 307 | parentNode = self.elementStack[-2] 308 | parentNode.appendChild(curNode) 309 | 310 | def processingInstruction(self, target, data): 311 | PullDOM.processingInstruction(self, target, data) 312 | node = self.lastEvent[0][1] 313 | parentNode = self.elementStack[-1] 314 | parentNode.appendChild(node) 315 | 316 | def ignorableWhitespace(self, chars): 317 | PullDOM.ignorableWhitespace(self, chars) 318 | node = self.lastEvent[0][1] 319 | parentNode = self.elementStack[-1] 320 | parentNode.appendChild(node) 321 | 322 | def characters(self, chars): 323 | PullDOM.characters(self, chars) 324 | node = self.lastEvent[0][1] 325 | parentNode = self.elementStack[-1] 326 | parentNode.appendChild(node) 327 | 328 | 329 | default_bufsize = (2 ** 14) - 20 330 | 331 | def parse(stream_or_string, parser=None, bufsize=None): 332 | if bufsize is None: 333 | bufsize = default_bufsize 334 | if isinstance(stream_or_string, str): 335 | stream = open(stream_or_string, 'rb') 336 | else: 337 | stream = stream_or_string 338 | if not parser: 339 | parser = xml.sax.make_parser() 340 | return DOMEventStream(stream, parser, bufsize) 341 | 342 | def parseString(string, parser=None): 343 | from io import StringIO 344 | 345 | bufsize = len(string) 346 | buf = StringIO(string) 347 | if not parser: 348 | parser = xml.sax.make_parser() 349 | return DOMEventStream(buf, parser, bufsize) 350 | -------------------------------------------------------------------------------- /xml/dom/xmlbuilder.py: -------------------------------------------------------------------------------- 1 | """Implementation of the DOM Level 3 'LS-Load' feature.""" 2 | 3 | import copy 4 | import warnings 5 | import xml.dom 6 | 7 | from xml.dom.NodeFilter import NodeFilter 8 | 9 | 10 | __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"] 11 | 12 | 13 | class Options: 14 | """Features object that has variables set for each DOMBuilder feature. 15 | 16 | The DOMBuilder class uses an instance of this class to pass settings to 17 | the ExpatBuilder class. 18 | """ 19 | 20 | # Note that the DOMBuilder class in LoadSave constrains which of these 21 | # values can be set using the DOM Level 3 LoadSave feature. 22 | 23 | namespaces = 1 24 | namespace_declarations = True 25 | validation = False 26 | external_parameter_entities = True 27 | external_general_entities = True 28 | external_dtd_subset = True 29 | validate_if_schema = False 30 | validate = False 31 | datatype_normalization = False 32 | create_entity_ref_nodes = True 33 | entities = True 34 | whitespace_in_element_content = True 35 | cdata_sections = True 36 | comments = True 37 | charset_overrides_xml_encoding = True 38 | infoset = False 39 | supported_mediatypes_only = False 40 | 41 | errorHandler = None 42 | filter = None 43 | 44 | 45 | class DOMBuilder: 46 | entityResolver = None 47 | errorHandler = None 48 | filter = None 49 | 50 | ACTION_REPLACE = 1 51 | ACTION_APPEND_AS_CHILDREN = 2 52 | ACTION_INSERT_AFTER = 3 53 | ACTION_INSERT_BEFORE = 4 54 | 55 | _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN, 56 | ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE) 57 | 58 | def __init__(self): 59 | self._options = Options() 60 | 61 | def _get_entityResolver(self): 62 | return self.entityResolver 63 | def _set_entityResolver(self, entityResolver): 64 | self.entityResolver = entityResolver 65 | 66 | def _get_errorHandler(self): 67 | return self.errorHandler 68 | def _set_errorHandler(self, errorHandler): 69 | self.errorHandler = errorHandler 70 | 71 | def _get_filter(self): 72 | return self.filter 73 | def _set_filter(self, filter): 74 | self.filter = filter 75 | 76 | def setFeature(self, name, state): 77 | if self.supportsFeature(name): 78 | state = state and 1 or 0 79 | try: 80 | settings = self._settings[(_name_xform(name), state)] 81 | except KeyError: 82 | raise xml.dom.NotSupportedErr( 83 | "unsupported feature: %r" % (name,)) from None 84 | else: 85 | for name, value in settings: 86 | setattr(self._options, name, value) 87 | else: 88 | raise xml.dom.NotFoundErr("unknown feature: " + repr(name)) 89 | 90 | def supportsFeature(self, name): 91 | return hasattr(self._options, _name_xform(name)) 92 | 93 | def canSetFeature(self, name, state): 94 | key = (_name_xform(name), state and 1 or 0) 95 | return key in self._settings 96 | 97 | # This dictionary maps from (feature,value) to a list of 98 | # (option,value) pairs that should be set on the Options object. 99 | # If a (feature,value) setting is not in this dictionary, it is 100 | # not supported by the DOMBuilder. 101 | # 102 | _settings = { 103 | ("namespace_declarations", 0): [ 104 | ("namespace_declarations", 0)], 105 | ("namespace_declarations", 1): [ 106 | ("namespace_declarations", 1)], 107 | ("validation", 0): [ 108 | ("validation", 0)], 109 | ("external_general_entities", 0): [ 110 | ("external_general_entities", 0)], 111 | ("external_general_entities", 1): [ 112 | ("external_general_entities", 1)], 113 | ("external_parameter_entities", 0): [ 114 | ("external_parameter_entities", 0)], 115 | ("external_parameter_entities", 1): [ 116 | ("external_parameter_entities", 1)], 117 | ("validate_if_schema", 0): [ 118 | ("validate_if_schema", 0)], 119 | ("create_entity_ref_nodes", 0): [ 120 | ("create_entity_ref_nodes", 0)], 121 | ("create_entity_ref_nodes", 1): [ 122 | ("create_entity_ref_nodes", 1)], 123 | ("entities", 0): [ 124 | ("create_entity_ref_nodes", 0), 125 | ("entities", 0)], 126 | ("entities", 1): [ 127 | ("entities", 1)], 128 | ("whitespace_in_element_content", 0): [ 129 | ("whitespace_in_element_content", 0)], 130 | ("whitespace_in_element_content", 1): [ 131 | ("whitespace_in_element_content", 1)], 132 | ("cdata_sections", 0): [ 133 | ("cdata_sections", 0)], 134 | ("cdata_sections", 1): [ 135 | ("cdata_sections", 1)], 136 | ("comments", 0): [ 137 | ("comments", 0)], 138 | ("comments", 1): [ 139 | ("comments", 1)], 140 | ("charset_overrides_xml_encoding", 0): [ 141 | ("charset_overrides_xml_encoding", 0)], 142 | ("charset_overrides_xml_encoding", 1): [ 143 | ("charset_overrides_xml_encoding", 1)], 144 | ("infoset", 0): [], 145 | ("infoset", 1): [ 146 | ("namespace_declarations", 0), 147 | ("validate_if_schema", 0), 148 | ("create_entity_ref_nodes", 0), 149 | ("entities", 0), 150 | ("cdata_sections", 0), 151 | ("datatype_normalization", 1), 152 | ("whitespace_in_element_content", 1), 153 | ("comments", 1), 154 | ("charset_overrides_xml_encoding", 1)], 155 | ("supported_mediatypes_only", 0): [ 156 | ("supported_mediatypes_only", 0)], 157 | ("namespaces", 0): [ 158 | ("namespaces", 0)], 159 | ("namespaces", 1): [ 160 | ("namespaces", 1)], 161 | } 162 | 163 | def getFeature(self, name): 164 | xname = _name_xform(name) 165 | try: 166 | return getattr(self._options, xname) 167 | except AttributeError: 168 | if name == "infoset": 169 | options = self._options 170 | return (options.datatype_normalization 171 | and options.whitespace_in_element_content 172 | and options.comments 173 | and options.charset_overrides_xml_encoding 174 | and not (options.namespace_declarations 175 | or options.validate_if_schema 176 | or options.create_entity_ref_nodes 177 | or options.entities 178 | or options.cdata_sections)) 179 | raise xml.dom.NotFoundErr("feature %s not known" % repr(name)) 180 | 181 | def parseURI(self, uri): 182 | if self.entityResolver: 183 | input = self.entityResolver.resolveEntity(None, uri) 184 | else: 185 | input = DOMEntityResolver().resolveEntity(None, uri) 186 | return self.parse(input) 187 | 188 | def parse(self, input): 189 | options = copy.copy(self._options) 190 | options.filter = self.filter 191 | options.errorHandler = self.errorHandler 192 | fp = input.byteStream 193 | if fp is None and options.systemId: 194 | import urllib.request 195 | fp = urllib.request.urlopen(input.systemId) 196 | return self._parse_bytestream(fp, options) 197 | 198 | def parseWithContext(self, input, cnode, action): 199 | if action not in self._legal_actions: 200 | raise ValueError("not a legal action") 201 | raise NotImplementedError("Haven't written this yet...") 202 | 203 | def _parse_bytestream(self, stream, options): 204 | import xml.dom.expatbuilder 205 | builder = xml.dom.expatbuilder.makeBuilder(options) 206 | return builder.parseFile(stream) 207 | 208 | 209 | def _name_xform(name): 210 | return name.lower().replace('-', '_') 211 | 212 | 213 | class DOMEntityResolver(object): 214 | __slots__ = '_opener', 215 | 216 | def resolveEntity(self, publicId, systemId): 217 | assert systemId is not None 218 | source = DOMInputSource() 219 | source.publicId = publicId 220 | source.systemId = systemId 221 | source.byteStream = self._get_opener().open(systemId) 222 | 223 | # determine the encoding if the transport provided it 224 | source.encoding = self._guess_media_encoding(source) 225 | 226 | # determine the base URI is we can 227 | import posixpath, urllib.parse 228 | parts = urllib.parse.urlparse(systemId) 229 | scheme, netloc, path, params, query, fragment = parts 230 | # XXX should we check the scheme here as well? 231 | if path and not path.endswith("/"): 232 | path = posixpath.dirname(path) + "/" 233 | parts = scheme, netloc, path, params, query, fragment 234 | source.baseURI = urllib.parse.urlunparse(parts) 235 | 236 | return source 237 | 238 | def _get_opener(self): 239 | try: 240 | return self._opener 241 | except AttributeError: 242 | self._opener = self._create_opener() 243 | return self._opener 244 | 245 | def _create_opener(self): 246 | import urllib.request 247 | return urllib.request.build_opener() 248 | 249 | def _guess_media_encoding(self, source): 250 | info = source.byteStream.info() 251 | if "Content-Type" in info: 252 | for param in info.getplist(): 253 | if param.startswith("charset="): 254 | return param.split("=", 1)[1].lower() 255 | 256 | 257 | class DOMInputSource(object): 258 | __slots__ = ('byteStream', 'characterStream', 'stringData', 259 | 'encoding', 'publicId', 'systemId', 'baseURI') 260 | 261 | def __init__(self): 262 | self.byteStream = None 263 | self.characterStream = None 264 | self.stringData = None 265 | self.encoding = None 266 | self.publicId = None 267 | self.systemId = None 268 | self.baseURI = None 269 | 270 | def _get_byteStream(self): 271 | return self.byteStream 272 | def _set_byteStream(self, byteStream): 273 | self.byteStream = byteStream 274 | 275 | def _get_characterStream(self): 276 | return self.characterStream 277 | def _set_characterStream(self, characterStream): 278 | self.characterStream = characterStream 279 | 280 | def _get_stringData(self): 281 | return self.stringData 282 | def _set_stringData(self, data): 283 | self.stringData = data 284 | 285 | def _get_encoding(self): 286 | return self.encoding 287 | def _set_encoding(self, encoding): 288 | self.encoding = encoding 289 | 290 | def _get_publicId(self): 291 | return self.publicId 292 | def _set_publicId(self, publicId): 293 | self.publicId = publicId 294 | 295 | def _get_systemId(self): 296 | return self.systemId 297 | def _set_systemId(self, systemId): 298 | self.systemId = systemId 299 | 300 | def _get_baseURI(self): 301 | return self.baseURI 302 | def _set_baseURI(self, uri): 303 | self.baseURI = uri 304 | 305 | 306 | class DOMBuilderFilter: 307 | """Element filter which can be used to tailor construction of 308 | a DOM instance. 309 | """ 310 | 311 | # There's really no need for this class; concrete implementations 312 | # should just implement the endElement() and startElement() 313 | # methods as appropriate. Using this makes it easy to only 314 | # implement one of them. 315 | 316 | FILTER_ACCEPT = 1 317 | FILTER_REJECT = 2 318 | FILTER_SKIP = 3 319 | FILTER_INTERRUPT = 4 320 | 321 | whatToShow = NodeFilter.SHOW_ALL 322 | 323 | def _get_whatToShow(self): 324 | return self.whatToShow 325 | 326 | def acceptNode(self, element): 327 | return self.FILTER_ACCEPT 328 | 329 | def startContainer(self, element): 330 | return self.FILTER_ACCEPT 331 | 332 | del NodeFilter 333 | 334 | 335 | class DocumentLS: 336 | """Mixin to create documents that conform to the load/save spec.""" 337 | 338 | async_ = False 339 | 340 | def _get_async(self): 341 | return False 342 | 343 | def _set_async(self, flag): 344 | if flag: 345 | raise xml.dom.NotSupportedErr( 346 | "asynchronous document loading is not supported") 347 | 348 | def abort(self): 349 | # What does it mean to "clear" a document? Does the 350 | # documentElement disappear? 351 | raise NotImplementedError( 352 | "haven't figured out what this means yet") 353 | 354 | def load(self, uri): 355 | raise NotImplementedError("haven't written this yet") 356 | 357 | def loadXML(self, source): 358 | raise NotImplementedError("haven't written this yet") 359 | 360 | def saveXML(self, snode): 361 | if snode is None: 362 | snode = self 363 | elif snode.ownerDocument is not self: 364 | raise xml.dom.WrongDocumentErr() 365 | return snode.toxml() 366 | 367 | 368 | class DOMImplementationLS: 369 | MODE_SYNCHRONOUS = 1 370 | MODE_ASYNCHRONOUS = 2 371 | 372 | def createDOMBuilder(self, mode, schemaType): 373 | if schemaType is not None: 374 | raise xml.dom.NotSupportedErr( 375 | "schemaType not yet supported") 376 | if mode == self.MODE_SYNCHRONOUS: 377 | return DOMBuilder() 378 | if mode == self.MODE_ASYNCHRONOUS: 379 | raise xml.dom.NotSupportedErr( 380 | "asynchronous builders are not supported") 381 | raise ValueError("unknown value for mode") 382 | 383 | def createDOMWriter(self): 384 | raise NotImplementedError( 385 | "the writer interface hasn't been written yet!") 386 | 387 | def createDOMInputSource(self): 388 | return DOMInputSource() 389 | -------------------------------------------------------------------------------- /xml/etree/ElementInclude.py: -------------------------------------------------------------------------------- 1 | # 2 | # ElementTree 3 | # $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ 4 | # 5 | # limited xinclude support for element trees 6 | # 7 | # history: 8 | # 2003-08-15 fl created 9 | # 2003-11-14 fl fixed default loader 10 | # 11 | # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. 12 | # 13 | # fredrik@pythonware.com 14 | # http://www.pythonware.com 15 | # 16 | # -------------------------------------------------------------------- 17 | # The ElementTree toolkit is 18 | # 19 | # Copyright (c) 1999-2008 by Fredrik Lundh 20 | # 21 | # By obtaining, using, and/or copying this software and/or its 22 | # associated documentation, you agree that you have read, understood, 23 | # and will comply with the following terms and conditions: 24 | # 25 | # Permission to use, copy, modify, and distribute this software and 26 | # its associated documentation for any purpose and without fee is 27 | # hereby granted, provided that the above copyright notice appears in 28 | # all copies, and that both that copyright notice and this permission 29 | # notice appear in supporting documentation, and that the name of 30 | # Secret Labs AB or the author not be used in advertising or publicity 31 | # pertaining to distribution of the software without specific, written 32 | # prior permission. 33 | # 34 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 35 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 36 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 37 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 38 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 39 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 40 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 41 | # OF THIS SOFTWARE. 42 | # -------------------------------------------------------------------- 43 | 44 | # Licensed to PSF under a Contributor Agreement. 45 | # See http://www.python.org/psf/license for licensing details. 46 | 47 | ## 48 | # Limited XInclude support for the ElementTree package. 49 | ## 50 | 51 | import copy 52 | from . import ElementTree 53 | 54 | XINCLUDE = "{http://www.w3.org/2001/XInclude}" 55 | 56 | XINCLUDE_INCLUDE = XINCLUDE + "include" 57 | XINCLUDE_FALLBACK = XINCLUDE + "fallback" 58 | 59 | ## 60 | # Fatal include error. 61 | 62 | class FatalIncludeError(SyntaxError): 63 | pass 64 | 65 | ## 66 | # Default loader. This loader reads an included resource from disk. 67 | # 68 | # @param href Resource reference. 69 | # @param parse Parse mode. Either "xml" or "text". 70 | # @param encoding Optional text encoding (UTF-8 by default for "text"). 71 | # @return The expanded resource. If the parse mode is "xml", this 72 | # is an ElementTree instance. If the parse mode is "text", this 73 | # is a Unicode string. If the loader fails, it can return None 74 | # or raise an OSError exception. 75 | # @throws OSError If the loader fails to load the resource. 76 | 77 | def default_loader(href, parse, encoding=None): 78 | if parse == "xml": 79 | with open(href, 'rb') as file: 80 | data = ElementTree.parse(file).getroot() 81 | else: 82 | if not encoding: 83 | encoding = 'UTF-8' 84 | with open(href, 'r', encoding=encoding) as file: 85 | data = file.read() 86 | return data 87 | 88 | ## 89 | # Expand XInclude directives. 90 | # 91 | # @param elem Root element. 92 | # @param loader Optional resource loader. If omitted, it defaults 93 | # to {@link default_loader}. If given, it should be a callable 94 | # that implements the same interface as default_loader. 95 | # @throws FatalIncludeError If the function fails to include a given 96 | # resource, or if the tree contains malformed XInclude elements. 97 | # @throws OSError If the function fails to load a given resource. 98 | 99 | def include(elem, loader=None): 100 | if loader is None: 101 | loader = default_loader 102 | # look for xinclude elements 103 | i = 0 104 | while i < len(elem): 105 | e = elem[i] 106 | if e.tag == XINCLUDE_INCLUDE: 107 | # process xinclude directive 108 | href = e.get("href") 109 | parse = e.get("parse", "xml") 110 | if parse == "xml": 111 | node = loader(href, parse) 112 | if node is None: 113 | raise FatalIncludeError( 114 | "cannot load %r as %r" % (href, parse) 115 | ) 116 | node = copy.copy(node) 117 | if e.tail: 118 | node.tail = (node.tail or "") + e.tail 119 | elem[i] = node 120 | elif parse == "text": 121 | text = loader(href, parse, e.get("encoding")) 122 | if text is None: 123 | raise FatalIncludeError( 124 | "cannot load %r as %r" % (href, parse) 125 | ) 126 | if i: 127 | node = elem[i-1] 128 | node.tail = (node.tail or "") + text + (e.tail or "") 129 | else: 130 | elem.text = (elem.text or "") + text + (e.tail or "") 131 | del elem[i] 132 | continue 133 | else: 134 | raise FatalIncludeError( 135 | "unknown parse type in xi:include tag (%r)" % parse 136 | ) 137 | elif e.tag == XINCLUDE_FALLBACK: 138 | raise FatalIncludeError( 139 | "xi:fallback tag must be child of xi:include (%r)" % e.tag 140 | ) 141 | else: 142 | include(e, loader) 143 | i = i + 1 144 | -------------------------------------------------------------------------------- /xml/etree/ElementPath.py: -------------------------------------------------------------------------------- 1 | # 2 | # ElementTree 3 | # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ 4 | # 5 | # limited xpath support for element trees 6 | # 7 | # history: 8 | # 2003-05-23 fl created 9 | # 2003-05-28 fl added support for // etc 10 | # 2003-08-27 fl fixed parsing of periods in element names 11 | # 2007-09-10 fl new selection engine 12 | # 2007-09-12 fl fixed parent selector 13 | # 2007-09-13 fl added iterfind; changed findall to return a list 14 | # 2007-11-30 fl added namespaces support 15 | # 2009-10-30 fl added child element value filter 16 | # 17 | # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. 18 | # 19 | # fredrik@pythonware.com 20 | # http://www.pythonware.com 21 | # 22 | # -------------------------------------------------------------------- 23 | # The ElementTree toolkit is 24 | # 25 | # Copyright (c) 1999-2009 by Fredrik Lundh 26 | # 27 | # By obtaining, using, and/or copying this software and/or its 28 | # associated documentation, you agree that you have read, understood, 29 | # and will comply with the following terms and conditions: 30 | # 31 | # Permission to use, copy, modify, and distribute this software and 32 | # its associated documentation for any purpose and without fee is 33 | # hereby granted, provided that the above copyright notice appears in 34 | # all copies, and that both that copyright notice and this permission 35 | # notice appear in supporting documentation, and that the name of 36 | # Secret Labs AB or the author not be used in advertising or publicity 37 | # pertaining to distribution of the software without specific, written 38 | # prior permission. 39 | # 40 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 41 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 42 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 43 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 44 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 45 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 46 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 47 | # OF THIS SOFTWARE. 48 | # -------------------------------------------------------------------- 49 | 50 | # Licensed to PSF under a Contributor Agreement. 51 | # See http://www.python.org/psf/license for licensing details. 52 | 53 | ## 54 | # Implementation module for XPath support. There's usually no reason 55 | # to import this module directly; the ElementTree does this for 56 | # you, if needed. 57 | ## 58 | 59 | import re 60 | 61 | xpath_tokenizer_re = re.compile( 62 | r"(" 63 | r"'[^']*'|\"[^\"]*\"|" 64 | r"::|" 65 | r"//?|" 66 | r"\.\.|" 67 | r"\(\)|" 68 | r"[/.*:\[\]\(\)@=])|" 69 | r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" 70 | r"\s+" 71 | ) 72 | 73 | def xpath_tokenizer(pattern, namespaces=None): 74 | default_namespace = namespaces.get('') if namespaces else None 75 | parsing_attribute = False 76 | for token in xpath_tokenizer_re.findall(pattern): 77 | ttype, tag = token 78 | if tag and tag[0] != "{": 79 | if ":" in tag: 80 | prefix, uri = tag.split(":", 1) 81 | try: 82 | if not namespaces: 83 | raise KeyError 84 | yield ttype, "{%s}%s" % (namespaces[prefix], uri) 85 | except KeyError: 86 | raise SyntaxError("prefix %r not found in prefix map" % prefix) from None 87 | elif default_namespace and not parsing_attribute: 88 | yield ttype, "{%s}%s" % (default_namespace, tag) 89 | else: 90 | yield token 91 | parsing_attribute = False 92 | else: 93 | yield token 94 | parsing_attribute = ttype == '@' 95 | 96 | 97 | def get_parent_map(context): 98 | parent_map = context.parent_map 99 | if parent_map is None: 100 | context.parent_map = parent_map = {} 101 | for p in context.root.iter(): 102 | for e in p: 103 | parent_map[e] = p 104 | return parent_map 105 | 106 | 107 | def _is_wildcard_tag(tag): 108 | return tag[:3] == '{*}' or tag[-2:] == '}*' 109 | 110 | 111 | def _prepare_tag(tag): 112 | _isinstance, _str = isinstance, str 113 | if tag == '{*}*': 114 | # Same as '*', but no comments or processing instructions. 115 | # It can be a surprise that '*' includes those, but there is no 116 | # justification for '{*}*' doing the same. 117 | def select(context, result): 118 | for elem in result: 119 | if _isinstance(elem.tag, _str): 120 | yield elem 121 | elif tag == '{}*': 122 | # Any tag that is not in a namespace. 123 | def select(context, result): 124 | for elem in result: 125 | el_tag = elem.tag 126 | if _isinstance(el_tag, _str) and el_tag[0] != '{': 127 | yield elem 128 | elif tag[:3] == '{*}': 129 | # The tag in any (or no) namespace. 130 | suffix = tag[2:] # '}name' 131 | no_ns = slice(-len(suffix), None) 132 | tag = tag[3:] 133 | def select(context, result): 134 | for elem in result: 135 | el_tag = elem.tag 136 | if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix: 137 | yield elem 138 | elif tag[-2:] == '}*': 139 | # Any tag in the given namespace. 140 | ns = tag[:-1] 141 | ns_only = slice(None, len(ns)) 142 | def select(context, result): 143 | for elem in result: 144 | el_tag = elem.tag 145 | if _isinstance(el_tag, _str) and el_tag[ns_only] == ns: 146 | yield elem 147 | else: 148 | raise RuntimeError(f"internal parser error, got {tag}") 149 | return select 150 | 151 | 152 | def prepare_child(next, token): 153 | tag = token[1] 154 | if _is_wildcard_tag(tag): 155 | select_tag = _prepare_tag(tag) 156 | def select(context, result): 157 | def select_child(result): 158 | for elem in result: 159 | yield from elem 160 | return select_tag(context, select_child(result)) 161 | else: 162 | if tag[:2] == '{}': 163 | tag = tag[2:] # '{}tag' == 'tag' 164 | def select(context, result): 165 | for elem in result: 166 | for e in elem: 167 | if e.tag == tag: 168 | yield e 169 | return select 170 | 171 | def prepare_star(next, token): 172 | def select(context, result): 173 | for elem in result: 174 | yield from elem 175 | return select 176 | 177 | def prepare_self(next, token): 178 | def select(context, result): 179 | yield from result 180 | return select 181 | 182 | def prepare_descendant(next, token): 183 | try: 184 | token = next() 185 | except StopIteration: 186 | return 187 | if token[0] == "*": 188 | tag = "*" 189 | elif not token[0]: 190 | tag = token[1] 191 | else: 192 | raise SyntaxError("invalid descendant") 193 | 194 | if _is_wildcard_tag(tag): 195 | select_tag = _prepare_tag(tag) 196 | def select(context, result): 197 | def select_child(result): 198 | for elem in result: 199 | for e in elem.iter(): 200 | if e is not elem: 201 | yield e 202 | return select_tag(context, select_child(result)) 203 | else: 204 | if tag[:2] == '{}': 205 | tag = tag[2:] # '{}tag' == 'tag' 206 | def select(context, result): 207 | for elem in result: 208 | for e in elem.iter(tag): 209 | if e is not elem: 210 | yield e 211 | return select 212 | 213 | def prepare_parent(next, token): 214 | def select(context, result): 215 | # FIXME: raise error if .. is applied at toplevel? 216 | parent_map = get_parent_map(context) 217 | result_map = {} 218 | for elem in result: 219 | if elem in parent_map: 220 | parent = parent_map[elem] 221 | if parent not in result_map: 222 | result_map[parent] = None 223 | yield parent 224 | return select 225 | 226 | def prepare_predicate(next, token): 227 | # FIXME: replace with real parser!!! refs: 228 | # http://effbot.org/zone/simple-iterator-parser.htm 229 | # http://javascript.crockford.com/tdop/tdop.html 230 | signature = [] 231 | predicate = [] 232 | while 1: 233 | try: 234 | token = next() 235 | except StopIteration: 236 | return 237 | if token[0] == "]": 238 | break 239 | if token == ('', ''): 240 | # ignore whitespace 241 | continue 242 | if token[0] and token[0][:1] in "'\"": 243 | token = "'", token[0][1:-1] 244 | signature.append(token[0] or "-") 245 | predicate.append(token[1]) 246 | signature = "".join(signature) 247 | # use signature to determine predicate type 248 | if signature == "@-": 249 | # [@attribute] predicate 250 | key = predicate[1] 251 | def select(context, result): 252 | for elem in result: 253 | if elem.get(key) is not None: 254 | yield elem 255 | return select 256 | if signature == "@-='": 257 | # [@attribute='value'] 258 | key = predicate[1] 259 | value = predicate[-1] 260 | def select(context, result): 261 | for elem in result: 262 | if elem.get(key) == value: 263 | yield elem 264 | return select 265 | if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): 266 | # [tag] 267 | tag = predicate[0] 268 | def select(context, result): 269 | for elem in result: 270 | if elem.find(tag) is not None: 271 | yield elem 272 | return select 273 | if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])): 274 | # [.='value'] or [tag='value'] 275 | tag = predicate[0] 276 | value = predicate[-1] 277 | if tag: 278 | def select(context, result): 279 | for elem in result: 280 | for e in elem.findall(tag): 281 | if "".join(e.itertext()) == value: 282 | yield elem 283 | break 284 | else: 285 | def select(context, result): 286 | for elem in result: 287 | if "".join(elem.itertext()) == value: 288 | yield elem 289 | return select 290 | if signature == "-" or signature == "-()" or signature == "-()-": 291 | # [index] or [last()] or [last()-index] 292 | if signature == "-": 293 | # [index] 294 | index = int(predicate[0]) - 1 295 | if index < 0: 296 | raise SyntaxError("XPath position >= 1 expected") 297 | else: 298 | if predicate[0] != "last": 299 | raise SyntaxError("unsupported function") 300 | if signature == "-()-": 301 | try: 302 | index = int(predicate[2]) - 1 303 | except ValueError: 304 | raise SyntaxError("unsupported expression") 305 | if index > -2: 306 | raise SyntaxError("XPath offset from last() must be negative") 307 | else: 308 | index = -1 309 | def select(context, result): 310 | parent_map = get_parent_map(context) 311 | for elem in result: 312 | try: 313 | parent = parent_map[elem] 314 | # FIXME: what if the selector is "*" ? 315 | elems = list(parent.findall(elem.tag)) 316 | if elems[index] is elem: 317 | yield elem 318 | except (IndexError, KeyError): 319 | pass 320 | return select 321 | raise SyntaxError("invalid predicate") 322 | 323 | ops = { 324 | "": prepare_child, 325 | "*": prepare_star, 326 | ".": prepare_self, 327 | "..": prepare_parent, 328 | "//": prepare_descendant, 329 | "[": prepare_predicate, 330 | } 331 | 332 | _cache = {} 333 | 334 | class _SelectorContext: 335 | parent_map = None 336 | def __init__(self, root): 337 | self.root = root 338 | 339 | # -------------------------------------------------------------------- 340 | 341 | ## 342 | # Generate all matching objects. 343 | 344 | def iterfind(elem, path, namespaces=None): 345 | # compile selector pattern 346 | if path[-1:] == "/": 347 | path = path + "*" # implicit all (FIXME: keep this?) 348 | 349 | cache_key = (path,) 350 | if namespaces: 351 | cache_key += tuple(sorted(namespaces.items())) 352 | 353 | try: 354 | selector = _cache[cache_key] 355 | except KeyError: 356 | if len(_cache) > 100: 357 | _cache.clear() 358 | if path[:1] == "/": 359 | raise SyntaxError("cannot use absolute path on element") 360 | next = iter(xpath_tokenizer(path, namespaces)).__next__ 361 | try: 362 | token = next() 363 | except StopIteration: 364 | return 365 | selector = [] 366 | while 1: 367 | try: 368 | selector.append(ops[token[0]](next, token)) 369 | except StopIteration: 370 | raise SyntaxError("invalid path") from None 371 | try: 372 | token = next() 373 | if token[0] == "/": 374 | token = next() 375 | except StopIteration: 376 | break 377 | _cache[cache_key] = selector 378 | # execute selector pattern 379 | result = [elem] 380 | context = _SelectorContext(elem) 381 | for select in selector: 382 | result = select(context, result) 383 | return result 384 | 385 | ## 386 | # Find first matching object. 387 | 388 | def find(elem, path, namespaces=None): 389 | return next(iterfind(elem, path, namespaces), None) 390 | 391 | ## 392 | # Find all matching objects. 393 | 394 | def findall(elem, path, namespaces=None): 395 | return list(iterfind(elem, path, namespaces)) 396 | 397 | ## 398 | # Find text for first matching object. 399 | 400 | def findtext(elem, path, default=None, namespaces=None): 401 | try: 402 | elem = next(iterfind(elem, path, namespaces)) 403 | return elem.text or "" 404 | except StopIteration: 405 | return default 406 | -------------------------------------------------------------------------------- /xml/etree/__init__.py: -------------------------------------------------------------------------------- 1 | # $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $ 2 | # elementtree package 3 | 4 | # -------------------------------------------------------------------- 5 | # The ElementTree toolkit is 6 | # 7 | # Copyright (c) 1999-2008 by Fredrik Lundh 8 | # 9 | # By obtaining, using, and/or copying this software and/or its 10 | # associated documentation, you agree that you have read, understood, 11 | # and will comply with the following terms and conditions: 12 | # 13 | # Permission to use, copy, modify, and distribute this software and 14 | # its associated documentation for any purpose and without fee is 15 | # hereby granted, provided that the above copyright notice appears in 16 | # all copies, and that both that copyright notice and this permission 17 | # notice appear in supporting documentation, and that the name of 18 | # Secret Labs AB or the author not be used in advertising or publicity 19 | # pertaining to distribution of the software without specific, written 20 | # prior permission. 21 | # 22 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 23 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 24 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 25 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 26 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 27 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 28 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 29 | # OF THIS SOFTWARE. 30 | # -------------------------------------------------------------------- 31 | 32 | # Licensed to PSF under a Contributor Agreement. 33 | # See http://www.python.org/psf/license for licensing details. 34 | -------------------------------------------------------------------------------- /xml/etree/cElementTree.py: -------------------------------------------------------------------------------- 1 | # Deprecated alias for xml.etree.ElementTree 2 | 3 | from xml.etree.ElementTree import * 4 | -------------------------------------------------------------------------------- /xml/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | """Python interfaces to XML parsers. 2 | 3 | This package contains one module: 4 | 5 | expat -- Python wrapper for James Clark's Expat parser, with namespace 6 | support. 7 | 8 | """ 9 | -------------------------------------------------------------------------------- /xml/parsers/expat.py: -------------------------------------------------------------------------------- 1 | """Interface to the Expat non-validating XML parser.""" 2 | import sys 3 | 4 | from pyexpat import * 5 | 6 | # provide pyexpat submodules as xml.parsers.expat submodules 7 | sys.modules['xml.parsers.expat.model'] = model 8 | sys.modules['xml.parsers.expat.errors'] = errors 9 | -------------------------------------------------------------------------------- /xml/sax/__init__.py: -------------------------------------------------------------------------------- 1 | """Simple API for XML (SAX) implementation for Python. 2 | 3 | This module provides an implementation of the SAX 2 interface; 4 | information about the Java version of the interface can be found at 5 | http://www.megginson.com/SAX/. The Python version of the interface is 6 | documented at <...>. 7 | 8 | This package contains the following modules: 9 | 10 | handler -- Base classes and constants which define the SAX 2 API for 11 | the 'client-side' of SAX for Python. 12 | 13 | saxutils -- Implementation of the convenience classes commonly used to 14 | work with SAX. 15 | 16 | xmlreader -- Base classes and constants which define the SAX 2 API for 17 | the parsers used with SAX for Python. 18 | 19 | expatreader -- Driver that allows use of the Expat parser with SAX. 20 | """ 21 | 22 | from .xmlreader import InputSource 23 | from .handler import ContentHandler, ErrorHandler 24 | from ._exceptions import SAXException, SAXNotRecognizedException, \ 25 | SAXParseException, SAXNotSupportedException, \ 26 | SAXReaderNotAvailable 27 | 28 | 29 | def parse(source, handler, errorHandler=ErrorHandler()): 30 | parser = make_parser() 31 | parser.setContentHandler(handler) 32 | parser.setErrorHandler(errorHandler) 33 | parser.parse(source) 34 | 35 | def parseString(string, handler, errorHandler=ErrorHandler()): 36 | import io 37 | if errorHandler is None: 38 | errorHandler = ErrorHandler() 39 | parser = make_parser() 40 | parser.setContentHandler(handler) 41 | parser.setErrorHandler(errorHandler) 42 | 43 | inpsrc = InputSource() 44 | if isinstance(string, str): 45 | inpsrc.setCharacterStream(io.StringIO(string)) 46 | else: 47 | inpsrc.setByteStream(io.BytesIO(string)) 48 | parser.parse(inpsrc) 49 | 50 | # this is the parser list used by the make_parser function if no 51 | # alternatives are given as parameters to the function 52 | 53 | default_parser_list = ["xml.sax.expatreader"] 54 | 55 | # tell modulefinder that importing sax potentially imports expatreader 56 | _false = 0 57 | if _false: 58 | import xml.sax.expatreader 59 | 60 | import os, sys 61 | if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ: 62 | default_parser_list = os.environ["PY_SAX_PARSER"].split(",") 63 | del os 64 | 65 | _key = "python.xml.sax.parser" 66 | if sys.platform[:4] == "java" and sys.registry.containsKey(_key): 67 | default_parser_list = sys.registry.getProperty(_key).split(",") 68 | 69 | 70 | def make_parser(parser_list=()): 71 | """Creates and returns a SAX parser. 72 | 73 | Creates the first parser it is able to instantiate of the ones 74 | given in the iterable created by chaining parser_list and 75 | default_parser_list. The iterables must contain the names of Python 76 | modules containing both a SAX parser and a create_parser function.""" 77 | 78 | for parser_name in list(parser_list) + default_parser_list: 79 | try: 80 | return _create_parser(parser_name) 81 | except ImportError as e: 82 | import sys 83 | if parser_name in sys.modules: 84 | # The parser module was found, but importing it 85 | # failed unexpectedly, pass this exception through 86 | raise 87 | except SAXReaderNotAvailable: 88 | # The parser module detected that it won't work properly, 89 | # so try the next one 90 | pass 91 | 92 | raise SAXReaderNotAvailable("No parsers found", None) 93 | 94 | # --- Internal utility methods used by make_parser 95 | 96 | if sys.platform[ : 4] == "java": 97 | def _create_parser(parser_name): 98 | from org.python.core import imp 99 | drv_module = imp.importName(parser_name, 0, globals()) 100 | return drv_module.create_parser() 101 | 102 | else: 103 | def _create_parser(parser_name): 104 | drv_module = __import__(parser_name,{},{},['create_parser']) 105 | return drv_module.create_parser() 106 | 107 | del sys 108 | -------------------------------------------------------------------------------- /xml/sax/_exceptions.py: -------------------------------------------------------------------------------- 1 | """Different kinds of SAX Exceptions""" 2 | import sys 3 | if sys.platform[:4] == "java": 4 | from java.lang import Exception 5 | del sys 6 | 7 | # ===== SAXEXCEPTION ===== 8 | 9 | class SAXException(Exception): 10 | """Encapsulate an XML error or warning. This class can contain 11 | basic error or warning information from either the XML parser or 12 | the application: you can subclass it to provide additional 13 | functionality, or to add localization. Note that although you will 14 | receive a SAXException as the argument to the handlers in the 15 | ErrorHandler interface, you are not actually required to raise 16 | the exception; instead, you can simply read the information in 17 | it.""" 18 | 19 | def __init__(self, msg, exception=None): 20 | """Creates an exception. The message is required, but the exception 21 | is optional.""" 22 | self._msg = msg 23 | self._exception = exception 24 | Exception.__init__(self, msg) 25 | 26 | def getMessage(self): 27 | "Return a message for this exception." 28 | return self._msg 29 | 30 | def getException(self): 31 | "Return the embedded exception, or None if there was none." 32 | return self._exception 33 | 34 | def __str__(self): 35 | "Create a string representation of the exception." 36 | return self._msg 37 | 38 | def __getitem__(self, ix): 39 | """Avoids weird error messages if someone does exception[ix] by 40 | mistake, since Exception has __getitem__ defined.""" 41 | raise AttributeError("__getitem__") 42 | 43 | 44 | # ===== SAXPARSEEXCEPTION ===== 45 | 46 | class SAXParseException(SAXException): 47 | """Encapsulate an XML parse error or warning. 48 | 49 | This exception will include information for locating the error in 50 | the original XML document. Note that although the application will 51 | receive a SAXParseException as the argument to the handlers in the 52 | ErrorHandler interface, the application is not actually required 53 | to raise the exception; instead, it can simply read the 54 | information in it and take a different action. 55 | 56 | Since this exception is a subclass of SAXException, it inherits 57 | the ability to wrap another exception.""" 58 | 59 | def __init__(self, msg, exception, locator): 60 | "Creates the exception. The exception parameter is allowed to be None." 61 | SAXException.__init__(self, msg, exception) 62 | self._locator = locator 63 | 64 | # We need to cache this stuff at construction time. 65 | # If this exception is raised, the objects through which we must 66 | # traverse to get this information may be deleted by the time 67 | # it gets caught. 68 | self._systemId = self._locator.getSystemId() 69 | self._colnum = self._locator.getColumnNumber() 70 | self._linenum = self._locator.getLineNumber() 71 | 72 | def getColumnNumber(self): 73 | """The column number of the end of the text where the exception 74 | occurred.""" 75 | return self._colnum 76 | 77 | def getLineNumber(self): 78 | "The line number of the end of the text where the exception occurred." 79 | return self._linenum 80 | 81 | def getPublicId(self): 82 | "Get the public identifier of the entity where the exception occurred." 83 | return self._locator.getPublicId() 84 | 85 | def getSystemId(self): 86 | "Get the system identifier of the entity where the exception occurred." 87 | return self._systemId 88 | 89 | def __str__(self): 90 | "Create a string representation of the exception." 91 | sysid = self.getSystemId() 92 | if sysid is None: 93 | sysid = "" 94 | linenum = self.getLineNumber() 95 | if linenum is None: 96 | linenum = "?" 97 | colnum = self.getColumnNumber() 98 | if colnum is None: 99 | colnum = "?" 100 | return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg) 101 | 102 | 103 | # ===== SAXNOTRECOGNIZEDEXCEPTION ===== 104 | 105 | class SAXNotRecognizedException(SAXException): 106 | """Exception class for an unrecognized identifier. 107 | 108 | An XMLReader will raise this exception when it is confronted with an 109 | unrecognized feature or property. SAX applications and extensions may 110 | use this class for similar purposes.""" 111 | 112 | 113 | # ===== SAXNOTSUPPORTEDEXCEPTION ===== 114 | 115 | class SAXNotSupportedException(SAXException): 116 | """Exception class for an unsupported operation. 117 | 118 | An XMLReader will raise this exception when a service it cannot 119 | perform is requested (specifically setting a state or value). SAX 120 | applications and extensions may use this class for similar 121 | purposes.""" 122 | 123 | # ===== SAXNOTSUPPORTEDEXCEPTION ===== 124 | 125 | class SAXReaderNotAvailable(SAXNotSupportedException): 126 | """Exception class for a missing driver. 127 | 128 | An XMLReader module (driver) should raise this exception when it 129 | is first imported, e.g. when a support module cannot be imported. 130 | It also may be raised during parsing, e.g. if executing an external 131 | program is not permitted.""" 132 | -------------------------------------------------------------------------------- /xml/sax/saxutils.py: -------------------------------------------------------------------------------- 1 | """\ 2 | A library of useful helper classes to the SAX classes, for the 3 | convenience of application and driver writers. 4 | """ 5 | 6 | import os, urllib.parse, urllib.request 7 | import io 8 | import codecs 9 | from . import handler 10 | from . import xmlreader 11 | 12 | def __dict_replace(s, d): 13 | """Replace substrings of a string using a dictionary.""" 14 | for key, value in d.items(): 15 | s = s.replace(key, value) 16 | return s 17 | 18 | def escape(data, entities={}): 19 | """Escape &, <, and > in a string of data. 20 | 21 | You can escape other strings of data by passing a dictionary as 22 | the optional entities parameter. The keys and values must all be 23 | strings; each key will be replaced with its corresponding value. 24 | """ 25 | 26 | # must do ampersand first 27 | data = data.replace("&", "&") 28 | data = data.replace(">", ">") 29 | data = data.replace("<", "<") 30 | if entities: 31 | data = __dict_replace(data, entities) 32 | return data 33 | 34 | def unescape(data, entities={}): 35 | """Unescape &, <, and > in a string of data. 36 | 37 | You can unescape other strings of data by passing a dictionary as 38 | the optional entities parameter. The keys and values must all be 39 | strings; each key will be replaced with its corresponding value. 40 | """ 41 | data = data.replace("<", "<") 42 | data = data.replace(">", ">") 43 | if entities: 44 | data = __dict_replace(data, entities) 45 | # must do ampersand last 46 | return data.replace("&", "&") 47 | 48 | def quoteattr(data, entities={}): 49 | """Escape and quote an attribute value. 50 | 51 | Escape &, <, and > in a string of data, then quote it for use as 52 | an attribute value. The \" character will be escaped as well, if 53 | necessary. 54 | 55 | You can escape other strings of data by passing a dictionary as 56 | the optional entities parameter. The keys and values must all be 57 | strings; each key will be replaced with its corresponding value. 58 | """ 59 | entities = {**entities, '\n': ' ', '\r': ' ', '\t':' '} 60 | data = escape(data, entities) 61 | if '"' in data: 62 | if "'" in data: 63 | data = '"%s"' % data.replace('"', """) 64 | else: 65 | data = "'%s'" % data 66 | else: 67 | data = '"%s"' % data 68 | return data 69 | 70 | 71 | def _gettextwriter(out, encoding): 72 | if out is None: 73 | import sys 74 | return sys.stdout 75 | 76 | if isinstance(out, io.TextIOBase): 77 | # use a text writer as is 78 | return out 79 | 80 | if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)): 81 | # use a codecs stream writer as is 82 | return out 83 | 84 | # wrap a binary writer with TextIOWrapper 85 | if isinstance(out, io.RawIOBase): 86 | # Keep the original file open when the TextIOWrapper is 87 | # destroyed 88 | class _wrapper: 89 | __class__ = out.__class__ 90 | def __getattr__(self, name): 91 | return getattr(out, name) 92 | buffer = _wrapper() 93 | buffer.close = lambda: None 94 | else: 95 | # This is to handle passed objects that aren't in the 96 | # IOBase hierarchy, but just have a write method 97 | buffer = io.BufferedIOBase() 98 | buffer.writable = lambda: True 99 | buffer.write = out.write 100 | try: 101 | # TextIOWrapper uses this methods to determine 102 | # if BOM (for UTF-16, etc) should be added 103 | buffer.seekable = out.seekable 104 | buffer.tell = out.tell 105 | except AttributeError: 106 | pass 107 | return io.TextIOWrapper(buffer, encoding=encoding, 108 | errors='xmlcharrefreplace', 109 | newline='\n', 110 | write_through=True) 111 | 112 | class XMLGenerator(handler.ContentHandler): 113 | 114 | def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False): 115 | handler.ContentHandler.__init__(self) 116 | out = _gettextwriter(out, encoding) 117 | self._write = out.write 118 | self._flush = out.flush 119 | self._ns_contexts = [{}] # contains uri -> prefix dicts 120 | self._current_context = self._ns_contexts[-1] 121 | self._undeclared_ns_maps = [] 122 | self._encoding = encoding 123 | self._short_empty_elements = short_empty_elements 124 | self._pending_start_element = False 125 | 126 | def _qname(self, name): 127 | """Builds a qualified name from a (ns_url, localname) pair""" 128 | if name[0]: 129 | # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is 130 | # bound by definition to http://www.w3.org/XML/1998/namespace. It 131 | # does not need to be declared and will not usually be found in 132 | # self._current_context. 133 | if 'http://www.w3.org/XML/1998/namespace' == name[0]: 134 | return 'xml:' + name[1] 135 | # The name is in a non-empty namespace 136 | prefix = self._current_context[name[0]] 137 | if prefix: 138 | # If it is not the default namespace, prepend the prefix 139 | return prefix + ":" + name[1] 140 | # Return the unqualified name 141 | return name[1] 142 | 143 | def _finish_pending_start_element(self,endElement=False): 144 | if self._pending_start_element: 145 | self._write('>') 146 | self._pending_start_element = False 147 | 148 | # ContentHandler methods 149 | 150 | def startDocument(self): 151 | self._write('\n' % 152 | self._encoding) 153 | 154 | def endDocument(self): 155 | self._flush() 156 | 157 | def startPrefixMapping(self, prefix, uri): 158 | self._ns_contexts.append(self._current_context.copy()) 159 | self._current_context[uri] = prefix 160 | self._undeclared_ns_maps.append((prefix, uri)) 161 | 162 | def endPrefixMapping(self, prefix): 163 | self._current_context = self._ns_contexts[-1] 164 | del self._ns_contexts[-1] 165 | 166 | def startElement(self, name, attrs): 167 | self._finish_pending_start_element() 168 | self._write('<' + name) 169 | for (name, value) in attrs.items(): 170 | self._write(' %s=%s' % (name, quoteattr(value))) 171 | if self._short_empty_elements: 172 | self._pending_start_element = True 173 | else: 174 | self._write(">") 175 | 176 | def endElement(self, name): 177 | if self._pending_start_element: 178 | self._write('/>') 179 | self._pending_start_element = False 180 | else: 181 | self._write('' % name) 182 | 183 | def startElementNS(self, name, qname, attrs): 184 | self._finish_pending_start_element() 185 | self._write('<' + self._qname(name)) 186 | 187 | for prefix, uri in self._undeclared_ns_maps: 188 | if prefix: 189 | self._write(' xmlns:%s="%s"' % (prefix, uri)) 190 | else: 191 | self._write(' xmlns="%s"' % uri) 192 | self._undeclared_ns_maps = [] 193 | 194 | for (name, value) in attrs.items(): 195 | self._write(' %s=%s' % (self._qname(name), quoteattr(value))) 196 | if self._short_empty_elements: 197 | self._pending_start_element = True 198 | else: 199 | self._write(">") 200 | 201 | def endElementNS(self, name, qname): 202 | if self._pending_start_element: 203 | self._write('/>') 204 | self._pending_start_element = False 205 | else: 206 | self._write('' % self._qname(name)) 207 | 208 | def characters(self, content): 209 | if content: 210 | self._finish_pending_start_element() 211 | if not isinstance(content, str): 212 | content = str(content, self._encoding) 213 | self._write(escape(content)) 214 | 215 | def ignorableWhitespace(self, content): 216 | if content: 217 | self._finish_pending_start_element() 218 | if not isinstance(content, str): 219 | content = str(content, self._encoding) 220 | self._write(content) 221 | 222 | def processingInstruction(self, target, data): 223 | self._finish_pending_start_element() 224 | self._write('' % (target, data)) 225 | 226 | 227 | class XMLFilterBase(xmlreader.XMLReader): 228 | """This class is designed to sit between an XMLReader and the 229 | client application's event handlers. By default, it does nothing 230 | but pass requests up to the reader and events on to the handlers 231 | unmodified, but subclasses can override specific methods to modify 232 | the event stream or the configuration requests as they pass 233 | through.""" 234 | 235 | def __init__(self, parent = None): 236 | xmlreader.XMLReader.__init__(self) 237 | self._parent = parent 238 | 239 | # ErrorHandler methods 240 | 241 | def error(self, exception): 242 | self._err_handler.error(exception) 243 | 244 | def fatalError(self, exception): 245 | self._err_handler.fatalError(exception) 246 | 247 | def warning(self, exception): 248 | self._err_handler.warning(exception) 249 | 250 | # ContentHandler methods 251 | 252 | def setDocumentLocator(self, locator): 253 | self._cont_handler.setDocumentLocator(locator) 254 | 255 | def startDocument(self): 256 | self._cont_handler.startDocument() 257 | 258 | def endDocument(self): 259 | self._cont_handler.endDocument() 260 | 261 | def startPrefixMapping(self, prefix, uri): 262 | self._cont_handler.startPrefixMapping(prefix, uri) 263 | 264 | def endPrefixMapping(self, prefix): 265 | self._cont_handler.endPrefixMapping(prefix) 266 | 267 | def startElement(self, name, attrs): 268 | self._cont_handler.startElement(name, attrs) 269 | 270 | def endElement(self, name): 271 | self._cont_handler.endElement(name) 272 | 273 | def startElementNS(self, name, qname, attrs): 274 | self._cont_handler.startElementNS(name, qname, attrs) 275 | 276 | def endElementNS(self, name, qname): 277 | self._cont_handler.endElementNS(name, qname) 278 | 279 | def characters(self, content): 280 | self._cont_handler.characters(content) 281 | 282 | def ignorableWhitespace(self, chars): 283 | self._cont_handler.ignorableWhitespace(chars) 284 | 285 | def processingInstruction(self, target, data): 286 | self._cont_handler.processingInstruction(target, data) 287 | 288 | def skippedEntity(self, name): 289 | self._cont_handler.skippedEntity(name) 290 | 291 | # DTDHandler methods 292 | 293 | def notationDecl(self, name, publicId, systemId): 294 | self._dtd_handler.notationDecl(name, publicId, systemId) 295 | 296 | def unparsedEntityDecl(self, name, publicId, systemId, ndata): 297 | self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) 298 | 299 | # EntityResolver methods 300 | 301 | def resolveEntity(self, publicId, systemId): 302 | return self._ent_handler.resolveEntity(publicId, systemId) 303 | 304 | # XMLReader methods 305 | 306 | def parse(self, source): 307 | self._parent.setContentHandler(self) 308 | self._parent.setErrorHandler(self) 309 | self._parent.setEntityResolver(self) 310 | self._parent.setDTDHandler(self) 311 | self._parent.parse(source) 312 | 313 | def setLocale(self, locale): 314 | self._parent.setLocale(locale) 315 | 316 | def getFeature(self, name): 317 | return self._parent.getFeature(name) 318 | 319 | def setFeature(self, name, state): 320 | self._parent.setFeature(name, state) 321 | 322 | def getProperty(self, name): 323 | return self._parent.getProperty(name) 324 | 325 | def setProperty(self, name, value): 326 | self._parent.setProperty(name, value) 327 | 328 | # XMLFilter methods 329 | 330 | def getParent(self): 331 | return self._parent 332 | 333 | def setParent(self, parent): 334 | self._parent = parent 335 | 336 | # --- Utility functions 337 | 338 | def prepare_input_source(source, base=""): 339 | """This function takes an InputSource and an optional base URL and 340 | returns a fully resolved InputSource object ready for reading.""" 341 | 342 | if isinstance(source, os.PathLike): 343 | source = os.fspath(source) 344 | if isinstance(source, str): 345 | source = xmlreader.InputSource(source) 346 | elif hasattr(source, "read"): 347 | f = source 348 | source = xmlreader.InputSource() 349 | if isinstance(f.read(0), str): 350 | source.setCharacterStream(f) 351 | else: 352 | source.setByteStream(f) 353 | if hasattr(f, "name") and isinstance(f.name, str): 354 | source.setSystemId(f.name) 355 | 356 | if source.getCharacterStream() is None and source.getByteStream() is None: 357 | sysid = source.getSystemId() 358 | basehead = os.path.dirname(os.path.normpath(base)) 359 | sysidfilename = os.path.join(basehead, sysid) 360 | if os.path.isfile(sysidfilename): 361 | source.setSystemId(sysidfilename) 362 | f = open(sysidfilename, "rb") 363 | else: 364 | source.setSystemId(urllib.parse.urljoin(base, sysid)) 365 | f = urllib.request.urlopen(source.getSystemId()) 366 | 367 | source.setByteStream(f) 368 | 369 | return source 370 | -------------------------------------------------------------------------------- /xml/sax/xmlreader.py: -------------------------------------------------------------------------------- 1 | """An XML Reader is the SAX 2 name for an XML parser. XML Parsers 2 | should be based on this code. """ 3 | 4 | from . import handler 5 | 6 | from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException 7 | 8 | 9 | # ===== XMLREADER ===== 10 | 11 | class XMLReader: 12 | """Interface for reading an XML document using callbacks. 13 | 14 | XMLReader is the interface that an XML parser's SAX2 driver must 15 | implement. This interface allows an application to set and query 16 | features and properties in the parser, to register event handlers 17 | for document processing, and to initiate a document parse. 18 | 19 | All SAX interfaces are assumed to be synchronous: the parse 20 | methods must not return until parsing is complete, and readers 21 | must wait for an event-handler callback to return before reporting 22 | the next event.""" 23 | 24 | def __init__(self): 25 | self._cont_handler = handler.ContentHandler() 26 | self._dtd_handler = handler.DTDHandler() 27 | self._ent_handler = handler.EntityResolver() 28 | self._err_handler = handler.ErrorHandler() 29 | 30 | def parse(self, source): 31 | "Parse an XML document from a system identifier or an InputSource." 32 | raise NotImplementedError("This method must be implemented!") 33 | 34 | def getContentHandler(self): 35 | "Returns the current ContentHandler." 36 | return self._cont_handler 37 | 38 | def setContentHandler(self, handler): 39 | "Registers a new object to receive document content events." 40 | self._cont_handler = handler 41 | 42 | def getDTDHandler(self): 43 | "Returns the current DTD handler." 44 | return self._dtd_handler 45 | 46 | def setDTDHandler(self, handler): 47 | "Register an object to receive basic DTD-related events." 48 | self._dtd_handler = handler 49 | 50 | def getEntityResolver(self): 51 | "Returns the current EntityResolver." 52 | return self._ent_handler 53 | 54 | def setEntityResolver(self, resolver): 55 | "Register an object to resolve external entities." 56 | self._ent_handler = resolver 57 | 58 | def getErrorHandler(self): 59 | "Returns the current ErrorHandler." 60 | return self._err_handler 61 | 62 | def setErrorHandler(self, handler): 63 | "Register an object to receive error-message events." 64 | self._err_handler = handler 65 | 66 | def setLocale(self, locale): 67 | """Allow an application to set the locale for errors and warnings. 68 | 69 | SAX parsers are not required to provide localization for errors 70 | and warnings; if they cannot support the requested locale, 71 | however, they must raise a SAX exception. Applications may 72 | request a locale change in the middle of a parse.""" 73 | raise SAXNotSupportedException("Locale support not implemented") 74 | 75 | def getFeature(self, name): 76 | "Looks up and returns the state of a SAX2 feature." 77 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 78 | 79 | def setFeature(self, name, state): 80 | "Sets the state of a SAX2 feature." 81 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name) 82 | 83 | def getProperty(self, name): 84 | "Looks up and returns the value of a SAX2 property." 85 | raise SAXNotRecognizedException("Property '%s' not recognized" % name) 86 | 87 | def setProperty(self, name, value): 88 | "Sets the value of a SAX2 property." 89 | raise SAXNotRecognizedException("Property '%s' not recognized" % name) 90 | 91 | class IncrementalParser(XMLReader): 92 | """This interface adds three extra methods to the XMLReader 93 | interface that allow XML parsers to support incremental 94 | parsing. Support for this interface is optional, since not all 95 | underlying XML parsers support this functionality. 96 | 97 | When the parser is instantiated it is ready to begin accepting 98 | data from the feed method immediately. After parsing has been 99 | finished with a call to close the reset method must be called to 100 | make the parser ready to accept new data, either from feed or 101 | using the parse method. 102 | 103 | Note that these methods must _not_ be called during parsing, that 104 | is, after parse has been called and before it returns. 105 | 106 | By default, the class also implements the parse method of the XMLReader 107 | interface using the feed, close and reset methods of the 108 | IncrementalParser interface as a convenience to SAX 2.0 driver 109 | writers.""" 110 | 111 | def __init__(self, bufsize=2**16): 112 | self._bufsize = bufsize 113 | XMLReader.__init__(self) 114 | 115 | def parse(self, source): 116 | from . import saxutils 117 | source = saxutils.prepare_input_source(source) 118 | 119 | self.prepareParser(source) 120 | file = source.getCharacterStream() 121 | if file is None: 122 | file = source.getByteStream() 123 | buffer = file.read(self._bufsize) 124 | while buffer: 125 | self.feed(buffer) 126 | buffer = file.read(self._bufsize) 127 | self.close() 128 | 129 | def feed(self, data): 130 | """This method gives the raw XML data in the data parameter to 131 | the parser and makes it parse the data, emitting the 132 | corresponding events. It is allowed for XML constructs to be 133 | split across several calls to feed. 134 | 135 | feed may raise SAXException.""" 136 | raise NotImplementedError("This method must be implemented!") 137 | 138 | def prepareParser(self, source): 139 | """This method is called by the parse implementation to allow 140 | the SAX 2.0 driver to prepare itself for parsing.""" 141 | raise NotImplementedError("prepareParser must be overridden!") 142 | 143 | def close(self): 144 | """This method is called when the entire XML document has been 145 | passed to the parser through the feed method, to notify the 146 | parser that there are no more data. This allows the parser to 147 | do the final checks on the document and empty the internal 148 | data buffer. 149 | 150 | The parser will not be ready to parse another document until 151 | the reset method has been called. 152 | 153 | close may raise SAXException.""" 154 | raise NotImplementedError("This method must be implemented!") 155 | 156 | def reset(self): 157 | """This method is called after close has been called to reset 158 | the parser so that it is ready to parse new documents. The 159 | results of calling parse or feed after close without calling 160 | reset are undefined.""" 161 | raise NotImplementedError("This method must be implemented!") 162 | 163 | # ===== LOCATOR ===== 164 | 165 | class Locator: 166 | """Interface for associating a SAX event with a document 167 | location. A locator object will return valid results only during 168 | calls to DocumentHandler methods; at any other time, the 169 | results are unpredictable.""" 170 | 171 | def getColumnNumber(self): 172 | "Return the column number where the current event ends." 173 | return -1 174 | 175 | def getLineNumber(self): 176 | "Return the line number where the current event ends." 177 | return -1 178 | 179 | def getPublicId(self): 180 | "Return the public identifier for the current event." 181 | return None 182 | 183 | def getSystemId(self): 184 | "Return the system identifier for the current event." 185 | return None 186 | 187 | # ===== INPUTSOURCE ===== 188 | 189 | class InputSource: 190 | """Encapsulation of the information needed by the XMLReader to 191 | read entities. 192 | 193 | This class may include information about the public identifier, 194 | system identifier, byte stream (possibly with character encoding 195 | information) and/or the character stream of an entity. 196 | 197 | Applications will create objects of this class for use in the 198 | XMLReader.parse method and for returning from 199 | EntityResolver.resolveEntity. 200 | 201 | An InputSource belongs to the application, the XMLReader is not 202 | allowed to modify InputSource objects passed to it from the 203 | application, although it may make copies and modify those.""" 204 | 205 | def __init__(self, system_id = None): 206 | self.__system_id = system_id 207 | self.__public_id = None 208 | self.__encoding = None 209 | self.__bytefile = None 210 | self.__charfile = None 211 | 212 | def setPublicId(self, public_id): 213 | "Sets the public identifier of this InputSource." 214 | self.__public_id = public_id 215 | 216 | def getPublicId(self): 217 | "Returns the public identifier of this InputSource." 218 | return self.__public_id 219 | 220 | def setSystemId(self, system_id): 221 | "Sets the system identifier of this InputSource." 222 | self.__system_id = system_id 223 | 224 | def getSystemId(self): 225 | "Returns the system identifier of this InputSource." 226 | return self.__system_id 227 | 228 | def setEncoding(self, encoding): 229 | """Sets the character encoding of this InputSource. 230 | 231 | The encoding must be a string acceptable for an XML encoding 232 | declaration (see section 4.3.3 of the XML recommendation). 233 | 234 | The encoding attribute of the InputSource is ignored if the 235 | InputSource also contains a character stream.""" 236 | self.__encoding = encoding 237 | 238 | def getEncoding(self): 239 | "Get the character encoding of this InputSource." 240 | return self.__encoding 241 | 242 | def setByteStream(self, bytefile): 243 | """Set the byte stream (a Python file-like object which does 244 | not perform byte-to-character conversion) for this input 245 | source. 246 | 247 | The SAX parser will ignore this if there is also a character 248 | stream specified, but it will use a byte stream in preference 249 | to opening a URI connection itself. 250 | 251 | If the application knows the character encoding of the byte 252 | stream, it should set it with the setEncoding method.""" 253 | self.__bytefile = bytefile 254 | 255 | def getByteStream(self): 256 | """Get the byte stream for this input source. 257 | 258 | The getEncoding method will return the character encoding for 259 | this byte stream, or None if unknown.""" 260 | return self.__bytefile 261 | 262 | def setCharacterStream(self, charfile): 263 | """Set the character stream for this input source. (The stream 264 | must be a Python 2.0 Unicode-wrapped file-like that performs 265 | conversion to Unicode strings.) 266 | 267 | If there is a character stream specified, the SAX parser will 268 | ignore any byte stream and will not attempt to open a URI 269 | connection to the system identifier.""" 270 | self.__charfile = charfile 271 | 272 | def getCharacterStream(self): 273 | "Get the character stream for this input source." 274 | return self.__charfile 275 | 276 | # ===== ATTRIBUTESIMPL ===== 277 | 278 | class AttributesImpl: 279 | 280 | def __init__(self, attrs): 281 | """Non-NS-aware implementation. 282 | 283 | attrs should be of the form {name : value}.""" 284 | self._attrs = attrs 285 | 286 | def getLength(self): 287 | return len(self._attrs) 288 | 289 | def getType(self, name): 290 | return "CDATA" 291 | 292 | def getValue(self, name): 293 | return self._attrs[name] 294 | 295 | def getValueByQName(self, name): 296 | return self._attrs[name] 297 | 298 | def getNameByQName(self, name): 299 | if name not in self._attrs: 300 | raise KeyError(name) 301 | return name 302 | 303 | def getQNameByName(self, name): 304 | if name not in self._attrs: 305 | raise KeyError(name) 306 | return name 307 | 308 | def getNames(self): 309 | return list(self._attrs.keys()) 310 | 311 | def getQNames(self): 312 | return list(self._attrs.keys()) 313 | 314 | def __len__(self): 315 | return len(self._attrs) 316 | 317 | def __getitem__(self, name): 318 | return self._attrs[name] 319 | 320 | def keys(self): 321 | return list(self._attrs.keys()) 322 | 323 | def __contains__(self, name): 324 | return name in self._attrs 325 | 326 | def get(self, name, alternative=None): 327 | return self._attrs.get(name, alternative) 328 | 329 | def copy(self): 330 | return self.__class__(self._attrs) 331 | 332 | def items(self): 333 | return list(self._attrs.items()) 334 | 335 | def values(self): 336 | return list(self._attrs.values()) 337 | 338 | # ===== ATTRIBUTESNSIMPL ===== 339 | 340 | class AttributesNSImpl(AttributesImpl): 341 | 342 | def __init__(self, attrs, qnames): 343 | """NS-aware implementation. 344 | 345 | attrs should be of the form {(ns_uri, lname): value, ...}. 346 | qnames of the form {(ns_uri, lname): qname, ...}.""" 347 | self._attrs = attrs 348 | self._qnames = qnames 349 | 350 | def getValueByQName(self, name): 351 | for (nsname, qname) in self._qnames.items(): 352 | if qname == name: 353 | return self._attrs[nsname] 354 | 355 | raise KeyError(name) 356 | 357 | def getNameByQName(self, name): 358 | for (nsname, qname) in self._qnames.items(): 359 | if qname == name: 360 | return nsname 361 | 362 | raise KeyError(name) 363 | 364 | def getQNameByName(self, name): 365 | return self._qnames[name] 366 | 367 | def getQNames(self): 368 | return list(self._qnames.values()) 369 | 370 | def copy(self): 371 | return self.__class__(self._attrs, self._qnames) 372 | 373 | 374 | def _test(): 375 | XMLReader() 376 | IncrementalParser() 377 | Locator() 378 | 379 | if __name__ == "__main__": 380 | _test() 381 | -------------------------------------------------------------------------------- /xpath/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from builtins import str 3 | from builtins import range 4 | from builtins import object 5 | from xpath.exceptions import * 6 | import xpath.exceptions 7 | import xpath.expr 8 | import xpath.parser 9 | import xpath.yappsrt 10 | 11 | __all__ = ['find', 'findnode', 'findvalue', 'XPathContext', 'XPath'] 12 | __all__.extend((x for x in dir(xpath.exceptions) if not x.startswith('_'))) 13 | 14 | def api(f): 15 | """Decorator for functions and methods that are part of the external 16 | module API and that can throw XPathError exceptions. 17 | 18 | The call stack for these exceptions can be very large, and not very 19 | interesting to the user. This decorator rethrows XPathErrors to 20 | trim the stack. 21 | 22 | """ 23 | def api_function(*args, **kwargs): 24 | try: 25 | return f(*args, **kwargs) 26 | except XPathError as e: 27 | raise e 28 | api_function.__name__ = f.__name__ 29 | api_function.__doc__ = f.__doc__ 30 | return api_function 31 | 32 | class XPathContext(object): 33 | def __init__(self, document=None, **kwargs): 34 | self.default_namespace = None 35 | self.namespaces = {} 36 | self.variables = {} 37 | 38 | if document is not None: 39 | if document.nodeType != document.DOCUMENT_NODE: 40 | document = document.ownerDocument 41 | if document.documentElement is not None: 42 | attrs = document.documentElement.attributes 43 | for attr in (attrs.item(i) for i in range(attrs.length)): 44 | if attr.name == 'xmlns': 45 | self.default_namespace = attr.value 46 | elif attr.name.startswith('xmlns:'): 47 | self.namespaces[attr.name[6:]] = attr.value 48 | 49 | self.update(**kwargs) 50 | 51 | def clone(self): 52 | dup = XPathContext() 53 | dup.default_namespace = self.default_namespace 54 | dup.namespaces.update(self.namespaces) 55 | dup.variables.update(self.variables) 56 | return dup 57 | 58 | def update(self, default_namespace=None, namespaces=None, 59 | variables=None, **kwargs): 60 | if default_namespace is not None: 61 | self.default_namespace = default_namespace 62 | if namespaces is not None: 63 | self.namespaces = namespaces 64 | if variables is not None: 65 | self.variables = variables 66 | self.variables.update(kwargs) 67 | 68 | @api 69 | def find(self, expr, node, **kwargs): 70 | return xpath.find(expr, node, context=self, **kwargs) 71 | 72 | @api 73 | def findnode(self, expr, node, **kwargs): 74 | return xpath.findnode(expr, node, context=self, **kwargs) 75 | 76 | @api 77 | def findvalue(self, expr, node, **kwargs): 78 | return xpath.findvalue(expr, node, context=self, **kwargs) 79 | 80 | @api 81 | def findvalues(self, expr, node, **kwargs): 82 | return xpath.findvalues(expr, node, context=self, **kwargs) 83 | 84 | class XPath(object): 85 | _max_cache = 100 86 | _cache = {} 87 | 88 | def __init__(self, expr): 89 | """Init docs. 90 | """ 91 | try: 92 | parser = xpath.parser.XPath(xpath.parser.XPathScanner(str(expr))) 93 | self.expr = parser.XPath() 94 | except xpath.yappsrt.SyntaxError as e: 95 | raise XPathParseError(str(expr), e.pos, e.msg) 96 | 97 | @classmethod 98 | def get(cls, s): 99 | if isinstance(s, cls): 100 | return s 101 | try: 102 | return cls._cache[s] 103 | except KeyError: 104 | if len(cls._cache) > cls._max_cache: 105 | cls._cache.clear() 106 | expr = cls(s) 107 | cls._cache[s] = expr 108 | return expr 109 | 110 | @api 111 | def find(self, node, context=None, **kwargs): 112 | if context is None: 113 | context = XPathContext(node, **kwargs) 114 | elif kwargs: 115 | context = context.clone() 116 | context.update(**kwargs) 117 | return self.expr.evaluate(node, 1, 1, context) 118 | 119 | @api 120 | def findnode(self, node, context=None, **kwargs): 121 | result = self.find(node, context, **kwargs) 122 | if not xpath.expr.nodesetp(result): 123 | raise XPathTypeError("expression is not a node-set") 124 | if len(result) == 0: 125 | return None 126 | return result[0] 127 | 128 | @api 129 | def findvalue(self, node, context=None, **kwargs): 130 | result = self.find(node, context, **kwargs) 131 | if xpath.expr.nodesetp(result): 132 | if len(result) == 0: 133 | return None 134 | result = xpath.expr.string(result) 135 | return result 136 | 137 | @api 138 | def findvalues(self, node, context=None, **kwargs): 139 | result = self.find(node, context, **kwargs) 140 | if not xpath.expr.nodesetp(result): 141 | raise XPathTypeError("expression is not a node-set") 142 | return [xpath.expr.string_value(x) for x in result] 143 | 144 | def __repr__(self): 145 | return '%s.%s(%s)' % (self.__class__.__module__, 146 | self.__class__.__name__, 147 | repr(str(self.expr))) 148 | 149 | def __str__(self): 150 | return str(self.expr) 151 | 152 | @api 153 | def find(expr, node, **kwargs): 154 | return XPath.get(expr).find(node, **kwargs) 155 | 156 | @api 157 | def findnode(expr, node, **kwargs): 158 | return XPath.get(expr).findnode(node, **kwargs) 159 | 160 | @api 161 | def findvalue(expr, node, **kwargs): 162 | return XPath.get(expr).findvalue(node, **kwargs) 163 | 164 | @api 165 | def findvalues(expr, node, **kwargs): 166 | return XPath.get(expr).findvalues(node, **kwargs) 167 | -------------------------------------------------------------------------------- /xpath/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class XPathError(Exception): 4 | """Base exception class used for all XPath exceptions.""" 5 | 6 | class XPathNotImplementedError(XPathError): 7 | """Raised when an XPath expression contains a feature of XPath which 8 | has not been implemented. 9 | 10 | """ 11 | 12 | class XPathParseError(XPathError): 13 | """Raised when an XPath expression could not be parsed.""" 14 | 15 | def __init__(self, expr, pos, message): 16 | XPathError.__init__(self) 17 | self.expr = expr 18 | self.pos = pos 19 | self.err = message 20 | 21 | def __str__(self): 22 | return ("Syntax error:\n" + 23 | self.expr.replace("\n", " ") + "\n" + 24 | ("-" * self.pos) + "^") 25 | 26 | class XPathTypeError(XPathError): 27 | """Raised when an XPath expression is found to contain a type error. 28 | For example, the expression "string()/node()" contains a type error 29 | because the "string()" function does not return a node-set. 30 | 31 | """ 32 | 33 | class XPathUnknownFunctionError(XPathError): 34 | """Raised when an XPath expression contains a function that has no 35 | binding in the expression context. 36 | 37 | """ 38 | 39 | class XPathUnknownPrefixError(XPathError): 40 | """Raised when an XPath expression contains a QName with a namespace 41 | prefix that has no corresponding namespace declaration in the expression 42 | context. 43 | 44 | """ 45 | 46 | class XPathUnknownVariableError(XPathError): 47 | """Raised when an XPath expression contains a variable that has no 48 | binding in the expression context. 49 | 50 | """ 51 | -------------------------------------------------------------------------------- /xpath/parser.g: -------------------------------------------------------------------------------- 1 | import xpath.expr as X 2 | from xpath.yappsrt import * 3 | 4 | %% 5 | 6 | parser XPath: 7 | option: 'no-support-module' 8 | 9 | ignore: r'\s+' 10 | token END: r'$' 11 | 12 | token FORWARD_AXIS_NAME: 13 | r'child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace' 14 | token REVERSE_AXIS_NAME: 15 | r'parent|preceding-sibling|preceding|ancestor-or-self|ancestor' 16 | 17 | # Dire hack here, since yapps2 has only one token of lookahead: NCNAME 18 | # does not match when followed by a open paren. 19 | token NCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*(?!\()' 20 | token FUNCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*' 21 | 22 | token DQUOTE: r'\"(?:[^\"])*\"' 23 | token SQUOTE: r"\'(?:[^\'])*\'" 24 | token NUMBER: r'((\.[0-9]+)|([0-9]+(\.[0-9]*)?))([eE][\+\-]?[0-9]+)?' 25 | token EQ_COMP: r'\!?\=' 26 | token REL_COMP: r'[\<\>]\=?' 27 | token ADD_COMP: r'[\+\-]' 28 | token MUL_COMP: r'\*|div|mod' 29 | 30 | rule XPath: 31 | Expr END {{ return Expr }} 32 | 33 | rule Expr: 34 | OrExpr {{ return OrExpr }} 35 | 36 | rule OrExpr: 37 | AndExpr {{ Expr = AndExpr }} 38 | ( 39 | r'or' AndExpr 40 | {{ Expr = X.OrExpr('or', Expr, AndExpr) }} 41 | )* {{ return Expr }} 42 | 43 | rule AndExpr: 44 | EqualityExpr {{ Expr = EqualityExpr }} 45 | ( 46 | r'and' EqualityExpr 47 | {{ Expr = X.AndExpr('and', Expr, EqualityExpr) }} 48 | )* {{ return Expr }} 49 | 50 | rule EqualityExpr: 51 | RelationalExpr {{ Expr = RelationalExpr }} 52 | ( 53 | EQ_COMP 54 | RelationalExpr 55 | {{ Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr) }} 56 | )* {{ return Expr }} 57 | 58 | rule RelationalExpr: 59 | AdditiveExpr {{ Expr = AdditiveExpr }} 60 | ( 61 | REL_COMP 62 | AdditiveExpr 63 | {{ Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr) }} 64 | )* {{ return Expr }} 65 | 66 | rule AdditiveExpr: 67 | MultiplicativeExpr {{ Expr = MultiplicativeExpr }} 68 | ( 69 | ADD_COMP 70 | MultiplicativeExpr 71 | {{ Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr) }} 72 | )* {{ return Expr }} 73 | 74 | rule MultiplicativeExpr: 75 | UnionExpr {{ Expr = UnionExpr }} 76 | ( 77 | MUL_COMP 78 | UnionExpr 79 | {{ Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr) }} 80 | )* {{ return Expr }} 81 | 82 | rule UnionExpr: 83 | UnaryExpr {{ Expr = UnaryExpr }} 84 | ( 85 | '\|' UnaryExpr 86 | {{ Expr = X.UnionExpr('|', Expr, UnaryExpr) }} 87 | )* {{ return Expr }} 88 | 89 | rule UnaryExpr: 90 | r'\-' ValueExpr {{ return X.NegationExpr(ValueExpr) }} 91 | | ValueExpr {{ return ValueExpr }} 92 | 93 | rule ValueExpr: 94 | PathExpr {{ return PathExpr }} 95 | 96 | rule PathExpr: 97 | r'\/' {{ path = None }} 98 | [ 99 | RelativePathExpr {{ path = RelativePathExpr }} 100 | ] {{ return X.AbsolutePathExpr(path) }} 101 | | r'\/\/' RelativePathExpr 102 | {{ step = X.AxisStep('descendant-or-self') }} 103 | {{ RelativePathExpr.steps.insert(0, step) }} 104 | {{ return X.AbsolutePathExpr(RelativePathExpr) }} 105 | | RelativePathExpr {{ return RelativePathExpr }} 106 | 107 | rule RelativePathExpr: 108 | StepExpr {{ steps = [StepExpr] }} 109 | ( 110 | ( 111 | r'\/' 112 | | r'\/\/' 113 | {{ steps.append(X.AxisStep('descendant-or-self')) }} 114 | ) 115 | StepExpr {{ steps.append(StepExpr) }} 116 | )* 117 | {{ return X.PathExpr(steps) }} 118 | 119 | rule StepExpr: 120 | AxisStep {{ return AxisStep }} 121 | | FilterExpr {{ return FilterExpr }} 122 | 123 | rule AxisStep: 124 | ( 125 | ForwardStep {{ step = ForwardStep }} 126 | | ReverseStep {{ step = ReverseStep }} 127 | ) {{ expr = X.AxisStep(*step) }} 128 | [ 129 | PredicateList 130 | {{ expr = X.PredicateList(expr, PredicateList, step[0]) }} 131 | ] 132 | {{ return expr }} 133 | 134 | rule ForwardStep: 135 | ForwardAxis NodeTest {{ return [ForwardAxis, NodeTest] }} 136 | | AbbrevForwardStep {{ return AbbrevForwardStep }} 137 | 138 | rule ForwardAxis: 139 | FORWARD_AXIS_NAME r'::' {{ return FORWARD_AXIS_NAME }} 140 | 141 | rule AbbrevForwardStep: 142 | {{ axis = 'child' }} 143 | [ 144 | r'@' {{ axis = 'attribute' }} 145 | ] 146 | NodeTest {{ return [axis, NodeTest] }} 147 | 148 | rule ReverseStep: 149 | ReverseAxis NodeTest {{ return [ReverseAxis, NodeTest] }} 150 | | AbbrevReverseStep {{ return AbbrevReverseStep }} 151 | 152 | rule ReverseAxis: 153 | REVERSE_AXIS_NAME r'::' {{ return REVERSE_AXIS_NAME }} 154 | 155 | rule AbbrevReverseStep: 156 | r'\.\.' {{ return ['parent', None] }} 157 | 158 | rule NodeTest: 159 | KindTest {{ return KindTest }} 160 | | NameTest {{ return NameTest }} 161 | 162 | rule NameTest: 163 | # We also support the XPath 2.0 :*. 164 | {{ prefix = None }} 165 | WildcardOrNCName {{ localpart = WildcardOrNCName }} 166 | [ 167 | r':' WildcardOrNCName {{ prefix = localpart }} 168 | {{ localpart = WildcardOrNCName }} 169 | ] 170 | {{ return X.NameTest(prefix, localpart) }} 171 | 172 | rule WildcardOrNCName: 173 | r'\*' {{ return '*' }} 174 | | NCNAME {{ return NCNAME }} 175 | 176 | rule FilterExpr: 177 | PrimaryExpr 178 | [ 179 | PredicateList 180 | {{ PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList) }} 181 | ] {{ return PrimaryExpr }} 182 | 183 | rule PredicateList: 184 | Predicate {{ predicates = [Predicate] }} 185 | ( 186 | Predicate {{ predicates.append(Predicate) }} 187 | )* {{ return predicates }} 188 | 189 | rule Predicate: 190 | r'\[' Expr r'\]' {{ return Expr }} 191 | 192 | rule PrimaryExpr: 193 | Literal {{ return X.LiteralExpr(Literal) }} 194 | | VariableReference {{ return VariableReference }} 195 | | r'\(' Expr r'\)' {{ return Expr }} 196 | | ContextItemExpr {{ return ContextItemExpr }} 197 | | FunctionCall {{ return FunctionCall }} 198 | 199 | rule VariableReference: 200 | r'\$' QName 201 | {{ return X.VariableReference(*QName) }} 202 | 203 | rule ContextItemExpr: 204 | r'\.' {{ return X.AxisStep('self') }} 205 | 206 | rule FunctionCall: 207 | FUNCNAME r'\(' {{ args = [] }} 208 | [ 209 | Expr {{ args.append(Expr) }} 210 | ( 211 | r'\,' Expr {{ args.append(Expr) }} 212 | )* 213 | ] r'\)' {{ return X.Function(FUNCNAME, args) }} 214 | 215 | rule KindTest: 216 | PITest {{ return PITest }} 217 | | CommentTest {{ return CommentTest }} 218 | | TextTest {{ return TextTest }} 219 | | AnyKindTest {{ return AnyKindTest }} 220 | 221 | rule PITest: 222 | r'processing-instruction' {{ name = None }} 223 | r'\(' [ 224 | NCNAME {{ name = NCNAME }} 225 | | StringLiteral {{ name = StringLiteral }} 226 | ] r'\)' {{ return X.PITest(name) }} 227 | 228 | rule CommentTest: 229 | r'comment' r'\(' r'\)' {{ return X.CommentTest() }} 230 | 231 | rule TextTest: 232 | r'text' r'\(' r'\)' {{ return X.TextTest() }} 233 | 234 | rule AnyKindTest: 235 | r'node' r'\(' r'\)' {{ return X.AnyKindTest() }} 236 | 237 | rule Literal: 238 | NumericLiteral {{ return NumericLiteral }} 239 | | StringLiteral {{ return StringLiteral }} 240 | 241 | rule NumericLiteral: 242 | NUMBER {{ return float(NUMBER) }} 243 | 244 | rule StringLiteral: 245 | DQUOTE {{ return DQUOTE[1:-1] }} 246 | | SQUOTE {{ return SQUOTE[1:-1] }} 247 | 248 | rule QName: 249 | NCNAME {{ name = NCNAME }} 250 | [ 251 | r'\:' NCNAME {{ return (name, NCNAME) }} 252 | ] {{ return (None, name) }} 253 | -------------------------------------------------------------------------------- /xpath/readme.txt: -------------------------------------------------------------------------------- 1 | This module was copied unmodified from py-dom-xpath, which I downloaded here: 2 | http://py-dom-xpath.googlecode.com/ 3 | 4 | It has a bug that prevents it from properly handling XML nodes named , which is crucial for LIFT files. 5 | 6 | This should be fixed, but for now a workaround is to make a temp copy that uses instead, 7 | and run XPaths against that. 8 | -------------------------------------------------------------------------------- /xpath/yappsrt.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Yapps 2.0 Runtime 4 | # 5 | # This module is needed to run generated parsers. 6 | 7 | from builtins import object 8 | import re 9 | 10 | class SyntaxError(Exception): 11 | """When we run into an unexpected token, this is the exception to use""" 12 | def __init__(self, pos=-1, msg="Bad Token"): 13 | Exception.__init__(self) 14 | self.pos = pos 15 | self.msg = msg 16 | def __repr__(self): 17 | if self.pos < 0: return "#" 18 | else: return "SyntaxError[@ char %s: %s]" % (repr(self.pos), self.msg) 19 | 20 | class NoMoreTokens(Exception): 21 | """Another exception object, for when we run out of tokens""" 22 | pass 23 | 24 | class Scanner(object): 25 | def __init__(self, patterns, ignore, input): 26 | """Patterns is [(terminal,regex)...] 27 | Ignore is [terminal,...]; 28 | Input is a string""" 29 | self.tokens = [] 30 | self.restrictions = [] 31 | self.input = input 32 | self.pos = 0 33 | self.ignore = ignore 34 | # The stored patterns are a pair (compiled regex,source 35 | # regex). If the patterns variable passed in to the 36 | # constructor is None, we assume that the class already has a 37 | # proper .patterns list constructed 38 | if patterns is not None: 39 | self.patterns = [] 40 | for k, r in patterns: 41 | self.patterns.append( (k, re.compile(r)) ) 42 | 43 | def token(self, i, restrict=0): 44 | """Get the i'th token, and if i is one past the end, then scan 45 | for another token; restrict is a list of tokens that 46 | are allowed, or 0 for any token.""" 47 | if i == len(self.tokens): self.scan(restrict) 48 | if i < len(self.tokens): 49 | # Make sure the restriction is more restricted 50 | if restrict and self.restrictions[i]: 51 | for r in restrict: 52 | if r not in self.restrictions[i]: 53 | raise NotImplementedError("Unimplemented: restriction set changed") 54 | return self.tokens[i] 55 | raise NoMoreTokens() 56 | 57 | def __repr__(self): 58 | """Print the last 10 tokens that have been scanned in""" 59 | output = '' 60 | for t in self.tokens[-10:]: 61 | output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3])) 62 | return output 63 | 64 | def scan(self, restrict): 65 | """Should scan another token and add it to the list, self.tokens, 66 | and add the restriction to self.restrictions""" 67 | # Keep looking for a token, ignoring any in self.ignore 68 | while 1: 69 | # Search the patterns for the longest match, with earlier 70 | # tokens in the list having preference 71 | best_match = -1 72 | best_pat = '(error)' 73 | for p, regexp in self.patterns: 74 | # First check to see if we're ignoring this token 75 | if restrict and p not in restrict and p not in self.ignore: 76 | continue 77 | m = regexp.match(self.input, self.pos) 78 | if m and len(m.group(0)) > best_match: 79 | # We got a match that's better than the previous one 80 | best_pat = p 81 | best_match = len(m.group(0)) 82 | 83 | # If we didn't find anything, raise an error 84 | if best_pat == '(error)' and best_match < 0: 85 | msg = "Bad Token" 86 | if restrict: 87 | msg = "Trying to find one of "+", ".join(restrict) 88 | raise SyntaxError(self.pos, msg) 89 | 90 | # If we found something that isn't to be ignored, return it 91 | if best_pat not in self.ignore: 92 | # Create a token with this data 93 | token = (self.pos, self.pos+best_match, best_pat, 94 | self.input[self.pos:self.pos+best_match]) 95 | self.pos = self.pos + best_match 96 | # Only add this token if it's not in the list 97 | # (to prevent looping) 98 | if not self.tokens or token != self.tokens[-1]: 99 | self.tokens.append(token) 100 | self.restrictions.append(restrict) 101 | return 102 | else: 103 | # This token should be ignored .. 104 | self.pos = self.pos + best_match 105 | 106 | class Parser(object): 107 | def __init__(self, scanner): 108 | self._scanner = scanner 109 | self._pos = 0 110 | 111 | def _peek(self, *types): 112 | """Returns the token type for lookahead; if there are any args 113 | then the list of args is the set of token types to allow""" 114 | tok = self._scanner.token(self._pos, types) 115 | return tok[2] 116 | 117 | def _scan(self, type): 118 | """Returns the matched text, and moves to the next token""" 119 | tok = self._scanner.token(self._pos, [type]) 120 | if tok[2] != type: 121 | raise SyntaxError(tok[0], 'Trying to find '+type) 122 | self._pos = 1+self._pos 123 | return tok[3] 124 | 125 | 126 | 127 | def print_error(input, err, scanner): 128 | """This is a really dumb long function to print error messages nicely.""" 129 | p = err.pos 130 | # Figure out the line number 131 | line = '\n'.count(input[:p]) 132 | print(err.msg+" on line "+repr(line+1)+":") 133 | # Now try printing part of the line 134 | text = input[max(p-80, 0):p+80] 135 | p = p - max(p-80, 0) 136 | 137 | # Strip to the left 138 | i = text[:p].rfind('\n') 139 | j = text[:p].rfind('\r') 140 | if i < 0 or (0 <= j < i): i = j 141 | if 0 <= i < p: 142 | p = p - i - 1 143 | text = text[i+1:] 144 | 145 | # Strip to the right 146 | i = text.find('\n', p) 147 | j = text.find('\r', p) 148 | if i < 0 or (0 <= j < i): i = j 149 | if i >= 0: 150 | text = text[:i] 151 | 152 | # Now shorten the text 153 | while len(text) > 70 and p > 60: 154 | # Cut off 10 chars 155 | text = "..." + text[10:] 156 | p = p - 7 157 | 158 | # Now print the string, along with an indicator 159 | print('> ',text) 160 | print('> ',' '*p + '^') 161 | print('List of nearby tokens:', scanner) 162 | 163 | def wrap_error_reporter(parser, rule): 164 | try: 165 | return_value = getattr(parser, rule)() 166 | except SyntaxError as s: 167 | input = parser._scanner.input 168 | try: 169 | print_error(input, s, parser._scanner) 170 | except ImportError: 171 | print('Syntax Error',s.msg,'on line',1+'\n'.count(input[:s.pos])) 172 | except NoMoreTokens: 173 | print('Could not complete parsing; stopped around here:') 174 | print(parser._scanner) 175 | return return_value 176 | --------------------------------------------------------------------------------