├── .gitignore
├── .project
├── .pydevproject
├── .settings
├── org.eclipse.core.resources.prefs
└── org.eclipse.ltk.core.refactoring.prefs
├── 2014goals-features-scope-discussion.txt
├── 20150105-flashgrab-github-question.txt
├── CopyFilesToAddons.bat
├── CopyFilesToAddons.sh
├── ExcludedFilesLinux.txt
├── ExcludedFilesWindows.txt
├── How-to-limit-to-a-subset-of-the-lexicon.md
├── LICENSE
├── README-manual-build.txt
├── README.md
├── User 1
└── collection.media
│ └── blank.txt
├── __init__.py
├── anki notes suggestions.txt
├── flashgrab-v0.8.7.zip
├── icons
├── flashgrid-flashgrab-icons.png
├── flashgrid-flashgrab-icons.svg
└── readme.txt
├── readme-developers.txt
├── sample-src
├── Indonesian Vocab 2015-01-30 1445 tagged-in-notes.fwbackup
├── Indonesian Vocab 2015-02-04 2149 updated media.fwbackup
└── readme-indo-sample.txt
├── syncx_local.py
├── syncxml
├── README.md
├── SyncFromXML.py
├── SyncFromXML_config_default.txt
├── SyncFromXML_help.pdf
├── SyncFromXML_log.txt
├── SyncFromXML_readme.txt
├── __init__.py
├── anki_util.py
├── docsrc
│ ├── SyncFromXML_help.odt
│ ├── checklog.png
│ ├── customtype.png
│ ├── customtype_sm.png
│ ├── flex_export.png
│ ├── menu.png
│ ├── report.png
│ ├── sync-report.png
│ ├── wizard-done.png
│ ├── wizard-examples.png
│ ├── wizard1.png
│ └── wizard2.png
├── file_util.py
├── flex_util.py
├── glob.py
├── local_launch.py
├── logg.py
├── samples
│ ├── Lindu-Test.lift
│ ├── audio
│ │ ├── 634710572710000000nagali.wav
│ │ └── 634710573956406250A LexExampleSentence.wav
│ ├── indonesian-lift-dictionary.apkg
│ ├── lift-dictionary.apkg
│ └── pictures
│ │ └── bersih.gif
├── syncxml.py
└── xml_util.py
├── tests
└── Moma
│ ├── Moma.WeSayConfig
│ ├── Moma.lift
│ ├── audio
│ ├── adat-63531587437330.wav
│ ├── loka-63531587541460.wav
│ ├── loka-63531587545733.wav
│ ├── mata-63531587590792.wav
│ └── mata-63531587597879.wav
│ └── pictures
│ ├── cultural law.png
│ └── sdd.png
├── this-is-an-eclipse-pydev-project.txt
├── xml
├── __init__.py
├── dom
│ ├── NodeFilter.py
│ ├── __init__.py
│ ├── domreg.py
│ ├── expatbuilder.py
│ ├── minicompat.py
│ ├── minidom.py
│ ├── pulldom.py
│ └── xmlbuilder.py
├── etree
│ ├── ElementInclude.py
│ ├── ElementPath.py
│ ├── ElementTree.py
│ ├── __init__.py
│ └── cElementTree.py
├── parsers
│ ├── __init__.py
│ └── expat.py
└── sax
│ ├── __init__.py
│ ├── _exceptions.py
│ ├── expatreader.py
│ ├── handler.py
│ ├── saxutils.py
│ └── xmlreader.py
└── xpath
├── __init__.py
├── exceptions.py
├── expr.py
├── parser.g
├── parser.py
├── readme.txt
└── yappsrt.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 |
25 | # PyInstaller
26 | # Usually these files are written by a python script from a template
27 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 |
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 |
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 |
43 | # Translations
44 | *.mo
45 | *.pot
46 |
47 | # Django stuff:
48 | *.log
49 |
50 | # Sphinx documentation
51 | docs/_build/
52 |
53 | # PyBuilder
54 | target/
55 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | flashgrab
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | /${PROJECT_DIR_NAME}
5 |
6 | python 2.7
7 | Default
8 |
9 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | #Wed Jan 29 10:56:06 CST 2014
2 | eclipse.preferences.version=1
3 | encoding/=UTF-8
4 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.ltk.core.refactoring.prefs:
--------------------------------------------------------------------------------
1 | #Thu Aug 30 15:34:08 SGT 2012
2 | eclipse.preferences.version=1
3 | org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false
4 |
--------------------------------------------------------------------------------
/2014goals-features-scope-discussion.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/2014goals-features-scope-discussion.txt
--------------------------------------------------------------------------------
/20150105-flashgrab-github-question.txt:
--------------------------------------------------------------------------------
1 | My wiki's connection to my repository appears to be broken.
2 |
3 | I still have these files in the repo but they no longer update the wiki:
4 | Home.md
5 | How-to-limit-to-a-subset-of-the-lexicon.md
6 | README.md
7 |
8 | Likewise, when I added a new wiki page (How-to-run-and-debug-Anki-in-a-Linux-virtual-machine-or-in-Windows) it didn't show up in the repo.
9 |
10 | I'm not sure if this was a recent GitHub change, or whether it's because the project was recently forked to:
11 | https://github.com/sillsdev/flashgrab
12 |
13 | (I've deleted the original repo: https://github.com/pconstrictor/lexml2anki)
14 |
15 | I see that I can clone the wiki as a separate repository--is that a new requirement? If so, I guess I should just delete those three files from the main repo?
16 |
17 | -Jon
18 |
--------------------------------------------------------------------------------
/CopyFilesToAddons.bat:
--------------------------------------------------------------------------------
1 | ECHO OFF
2 | REM A Windows 'build' script. Run this after editing the addon's files. Then restart Anki.
3 | REM Don't make edits over there in addons directly, as VCS won't see them, and an Anki or addon upgrade
4 | REM might destroy them.
5 | ECHO ON
6 |
7 | REM copy all modified (D) files and folders (even E, empty ones), and overwrite (Y) without prompting
8 | REM (Like Anki's own deployment, this does NOT remove any files. The simplest way to do so manually here is to delete the addons folder, then re-run.)
9 |
10 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt xml\*.* %USERPROFILE%\documents\Anki\addons\xml\
11 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt xpath\*.* %USERPROFILE%\documents\Anki\addons\xpath\
12 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt distutils\*.* %USERPROFILE%\documents\Anki\addons\distutils\
13 | xcopy /D /E /Y /exclude:ExcludedFilesWindows.txt syncxml\*.* %USERPROFILE%\documents\Anki\addons\syncxml\
14 | xcopy /D /Y syncx.py %USERPROFILE%\documents\Anki\addons\
15 |
16 | PAUSE
17 |
--------------------------------------------------------------------------------
/CopyFilesToAddons.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # To run this script from the terminal command line: ./CopyFilesToAddons.sh
4 | # A Linux 'build' script. Run this after editing the addon's files. Then restart Anki.
5 | # Don't make edits over there in addons directly, as VCS won't see them, and an Anki or addon upgrade might destroy them.
6 |
7 | # Copy recursively (-r) all modified files and folders, except the excluded ones
8 | # (Like Anki's own deployment, this does NOT remove any files. The simplest way to do so manually here is to delete the addons folder, then re-run.)
9 |
10 | mkdir -p ~/Anki/addons
11 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt xml ~/Anki/addons/
12 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt xpath ~/Anki/addons/
13 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt distutils ~/Anki/addons/
14 | rsync -r -u -v --exclude-from ExcludedFilesLinux.txt syncxml ~/Anki/addons/
15 | cp -r -u -v syncx.py ~/Anki/addons/
16 |
--------------------------------------------------------------------------------
/ExcludedFilesLinux.txt:
--------------------------------------------------------------------------------
1 | - *.pyc
2 | - local_launch.py
3 | - syncx_local.py
4 | - SyncFromXML_config.txt
5 | - SyncFromXML_log.txt
6 | - SyncFromXML_configB*
--------------------------------------------------------------------------------
/ExcludedFilesWindows.txt:
--------------------------------------------------------------------------------
1 | .pyc\
2 | \local_launch.py\
3 | \syncx_local.py\
4 | \SyncFromXML_config.txt\
5 | \SyncFromXML_log.txt\
6 | \SyncFromXML_configB
--------------------------------------------------------------------------------
/How-to-limit-to-a-subset-of-the-lexicon.md:
--------------------------------------------------------------------------------
1 | How to export a subset of entries from FLEx to LIFT for use in Anki,
2 | or to export all and then limit in Anki.
3 |
4 | How to use only a subset of entries from FLEx in Anki
5 |
6 | (A) Type a code that you can filter on (e.g. "DIFC01") into an entry-level field in FLEx. Then, every time you want to sync your data to Anki, you would filter on this and export the filtered lexicon to LIFT. The best approach would be to add a custom list and list field (e.g. "Difficulty") to the entry level and use it to assign difficulty levels to each vocabulary item. You could then export just level 1 to Anki at first, then gradually add and export higher difficulty words over time.
7 |
8 | (a1 - faster sync) A filtered export like this runs faster and keeps all your editing in FLEx. But it requires that you apply this same filter each time you export.
9 |
10 | (a2 - less work for the human) export the whole lexicon every time, including that field. In Anki, enable/disable cards based on the field value. (The next sync will *not* blow away Anki's own info such as this disabling.)
11 |
12 | Here is some help on filtering and suspending in Anki:
13 | http://ankisrs.net/docs/dev/manual.html#searching
14 | http://ankisrs.net/docs/dev/manual.html#editmore
15 |
16 | There's also a new "filtered deck" feature that might provide exactly this functionality, but I have not yet tried it out:
17 | http://ankisrs.net/docs/dev/manual.html#filtered
18 |
19 | ..
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Free and open source, under Anki's license.
2 | Created 2011-2014 by SIL International (coded by Jonathan Coombs)
3 |
--------------------------------------------------------------------------------
/README-manual-build.txt:
--------------------------------------------------------------------------------
1 | To 'build' this addon, first make sure that the version number in syncxml.py is correct. Then, make a zip file containing these folders and this file:
2 | syncxml/
3 | distutils/
4 | xml/
5 | xpath/
6 | syncx.py
7 |
8 | Then, log in and upload the zip file to this addon's page:
9 | https://ankiweb.net/shared/info/915685712
10 |
11 | Optionally, first make sure that the files here are up to date:
12 | syncxml/samples
13 |
14 | That is, if you've edited the FLEx project, do this:
15 | - Re-export that (with media) to LIFT
16 | - If necessary, delete the Anki project, and Note Types DICT_LIFT/DICT_LIFT_EX, and reload this afresh:
17 | syncxml/samples/lift-dictionary.apkg
18 | - Sync the LIFT/media into that fresh Anki deck, then re-create an updated .apkg file.
19 | At this point you could also re-share the .apkg file on its own, here:
20 | https://ankiweb.net/shared/decks/
21 |
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | FlashGrab (a.k.a. lexml2anki, SyncFromXml, syncxml)
2 | ==========
3 |
4 | Please see https://github.com/sillsdev/flashgrab/wiki for an overview.
5 |
6 | The "Sync one-way from XML" addon for Anki pulls data into Anki from one or more external XML sources, as
7 | determined by a config file. It does not modify fields that are specific to Anki (e.g. it does not reset any existing
8 | flashcards), but it WILL OVERWRITE any edits that you've made within Anki to the data that was pulled in. The
9 | assumption is that you want to do your editing elsewhere and simply pull the latest data into Anki.
10 |
11 | Note: if you'll be importing LIFT data, the first thing the auto-config does is import the
12 | .apkg file. (It is located in the samples folder next to this readme file; that is, in
13 | %userprofile%\My Documents\Anki\addons\syncxml\samples .) This will put the needed structures in place so that
14 | there are enough fields in the Anki deck to receive several different dictionary fields.
15 |
16 | After that, the wizard will try to auto-configure itself for your LIFT data.
17 |
18 | The rest of this readme file describes the details of the config file. For general documentation, see
19 | SyncFromXML_help.pdf instead. (You may find also find the log file useful for troubleshooting during the initial
20 | configuration.)
21 |
22 | CONFIG DETAILS:
23 |
24 | The configuration (SyncFromXML_config.txt) file tells the addon:
25 | - where to find the XML file to pull its data from,
26 | - where precisely the data is located within that source file, and
27 | - where to store it in Anki.
28 |
29 | Before running the addon, try editing the config file by specifying the source file paths and carefully replacing the writing system codes between the " or {} marks. Also make sure that the correct model / note type (e.g. DICT_LIFT) is available in Anki (the easiest way to do this is to import the preconfigured sample deck), and make sure that 'deckname' refers to the Anki deck you wish to copy your data into. This should be enough to start pulling data in from your LIFT file.
30 |
31 |
32 | CONFIGURATION OPTIONS:
33 |
34 | source_file, source, source_field: To start pulling data into Anki, the source_file path needs to point to an XML file, and there needs to be a valid 'source' element containing at least one valid 'source_field' element. You can supply multiple source files if you wish, by specifying multiple 'source' elements. (And a single source file may be used more than once.)
35 |
36 | To load from the addon's 'samples' subfolder, just a short relative path will suffice for each path. Example:
37 |
38 | source_file="samples/Lindu-Test.lift"
39 | source_audio_folder="samples/audio"
40 | source_image_folder="samples/pictures" >
41 |
42 | But assuming your own source file is located elsewhere, you'll want to specify absolute paths. Example:
43 |
44 | source_file="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\Lindu-Test.lift"
45 | source_audio_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\audio"
46 | source_image_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\pictures" >
47 |
48 | audio_folder, image_folder: The base paths for finding any media files whose links are not already stored as absolute paths in the XML file.
49 |
50 | base_xpath: The base_xpath for each source matches on the root element that will correspond to one Anki note, and all source_field elements must be relative to that base path, representing the fields that will be copied over into that note. For example, given a LIFT file as input, the element would typically be specified as the root, and each source_field would point to something within , such as Lexeme Form, Gloss, Illustration, etc.
51 |
52 | grab: Typically, you should specify grab="first" for each field, in which case only the first matching value will be copied over. But for non-media fields, you may also specify grab="concat_all", in which case multiple values will be concatenated (separated by a separator such as ;;). In the sample config file, multiple glosses and multiple definitions are handled using concat_all.
53 |
54 | autodelete: This should be set to "false" since this feature is not yet implemented.
55 |
56 |
57 | ASSUMPTIONS AND LIMITATIONS:
58 | - Each sync operation will OVERWRITE the matching data in any matching Anki note. This is intentional, allowing you to do all of your editing externally rather than doing duplicate work in Anki.
59 | - Currently, *all* matching records are imported from the source file. If you want a filtered subset, you'll need to trim down the XML file. (E.g. in FLEx, you can choose to filter down to a desired subset before exporting to LIFT XML.)
60 |
61 |
62 | DEVELOPER NOTES:
63 | - Standard XPath syntax is used in this config file, except that quotes need to be escaped, either using the standard "ws" syntax, or using my non-standard {ws} syntax. (The addon supports the latter for convenience/readability.)
64 | - The XPath actually used by the sync software will consist of (a) the base_path, plus (b) the field's own xpath fragment, plus (c) descendant-or-self::text() .
65 | - Anything involving links would be hard to pull in cleanly, since these are stored as (long) IDs in LIFT. XPath has no way of
66 | knowing how to follow LIFT links. However, the first part of the ID is usually readable, so pulling the whole thing into
67 | an Anki field might be barely usable, though ugly, as in this example for synonyms. Ditto for other links (e.g. confer).
68 |
69 |
70 |
71 | A link to "absuwelto" would display like this: absuwelto_e50fee0e-45d3-43d3-bc71-b6cd4983607c
72 |
73 | Probably not worth it. And doing this for variants or complex forms specifically is even trickier, since distinguishing between
74 | them would require looking deeper down first, into the nested element, to decide whether or not its parent is a match.
75 |
76 | - The XPath syntax supported by this addon is all standard, with one tweak: since these xpaths are embedded here
77 | in XML elements, quotation marks should not be used directly. Rather than using " every time, curly brackets are supported.
78 |
79 | - I think the following ought to work, but doesn't; the syntax for reading attributes instead of filtering on them
80 | seems poorly supported (though it works for the root element's guid attribute):
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/User 1/collection.media/blank.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/User 1/collection.media/blank.txt
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Anki addon that performs a one-way sync, pulling data into Anki
3 | from the source XML file(s) as specified in the config file.
4 | """
5 |
6 | from syncxml import SyncFromXML
7 |
8 |
--------------------------------------------------------------------------------
/anki notes suggestions.txt:
--------------------------------------------------------------------------------
1 |
2 | I took a course in Second Language Acquisition as part of a degree in linguistics, and the primary strategy we were taught for learning new vocabulary was TPR (Total Physical Response). This generally involves pointing at something or making some physical movement as a correct response to what a native speaker says as part of a practice drill. Example: while looking at a grid of four (or more) photos, the trainer describes something about one of them, and the listener points to the correct photo.
3 |
4 | That is the approach that Rosetta Stone takes, and it is very effective. However, I think it could be much more effective in an SRS context such as Anki, where you don't have to repeat an entire lesson just to work on the one or two hard-to-memorize words in that lesson.
5 |
6 | In Anki, I think the trick would be to keep selecting four (or more) cards that all would 'work' in a given drill. In the example above, four cards that contained both audio and an image would do nicely; just one of the four audio clips would be played, and the user would click on the corresponding image. Here's a plain-text example: an English word is prominently displayed center-screen, and the user clicks on one of four tiles containing a foreign-language word, hopefully the one with that English meaning.
7 |
8 |
9 | C:\Documents and Settings\user57\Application Data\.anki\plugins
10 | D:\files\workspace\AnkiFromXML
11 | C:\Program Files\Anki\xml
12 | anki-2.0.9
13 |
14 | -----
15 |
16 | Okay, I found this page again (which had previously seemed unnecessary for plugin development):
17 | http://ankisrs.net/docs/WindowsDevEnvironment.html
18 |
19 | I guess I need to bite the bullet and go through all of those steps. But do I really need to use Python 2.6.x? I've already got 2.7.2 installed (as well as 3.2).
20 |
21 | Jon
22 |
23 | donate:
24 | Win Media Player plugin?
25 |
26 |
27 | creating Qt interfaces for Python apps:
28 | http://query7.com/learning-python-qt-programming-with-pyqt-pyside
29 | examples
30 | http://www.cs.usfca.edu/~afedosov/qttut/ (good basic tutorial)
31 | http://diotavelli.net/PyQtWiki/Creating_GUI_Applications_with_PyQt_and_Qt_Designer
32 |
33 | Reply to this thread?
34 | "it would be a lot easier if there was more documentation"
35 | http://groups.google.com/group/ankisrs/browse_thread/thread/9a4226454d629cb6?pli=1
36 |
37 |
38 |
39 | Where hooks are and aren't in Anki:
40 | http://groups.google.com/group/ankisrs/browse_thread/thread/41627e4fc4ab965a/953279aa7cefc3b6?lnk=gst&q=plugin&pli=1
41 |
42 | My thread:
43 | http://groups.google.com/group/ankisrs/browse_thread/thread/7cda32eff95a78ca/6caae97f05cc2767?lnk=gst&q=jon+c#
44 |
45 | Two posts where I mentioned my plans:
46 | http://groups.google.com/group/flex-list/browse_thread/thread/aaabc329bd542bc1#
47 | http://groups.google.com/group/flex-list/browse_thread/thread/97b9d6ef80a728e2/217716f873cf5a8c?lnk=gst&q=keyes#
48 |
49 | The LIFT list:
50 | http://groups.google.com/group/LexiconInterchangeFormat
51 | LIFTDotNet (some C# code for parsing LIFT):
52 | http://lift-standard.googlecode.com/svn/trunk/LIFTDotNet/
53 |
54 | Spanish deck note: "ALSO, because these cards were added at the end, the deck is now out of order.
55 | This isn't a major problem (just means you'll get all the errored cards at the end),
56 | but if you want to avoid this (recommended), make inactive tags 2000-10000 when you're
57 | on the first 1000, 3000-10000 when on the second 2000, etc."
58 |
59 | Other decks of interest:
60 | IPA
61 | Arabic alphabet
62 | U.S. state capitls w screenshots
63 | hi freq Hebrew vocab
64 | Greek
65 | US trees
66 | The Indonesian way
67 |
68 | Cards to add to lifedeck:
69 | Lindu team members
70 | Kulawi team members
71 | Kulawi kepala desa
72 | supporters
73 | Bethany/Calvary/WBC friends
74 | KD translator in Toraja
75 |
76 | plugins:
77 | - hello world
78 | - *For developers only: batch processing plugin example
79 | - search fields
80 | - tolerant spell-check
81 | - improved card search
82 | - display all the answers to a question (i.e. for all senses? homographs?)
83 | - bulk cloze
84 | - find homonyms/synonyms
85 | - read: Anki reset
86 | - mark on delete
87 |
88 |
89 | MODEL:
90 | lexeme form (prevent empty)
91 | hom num?
92 | variants?
93 | lexeme form audio
94 | meanings (concat of glosses and s.t. defs)
95 | defs?
96 | example text
97 | more examples?
98 | example audio
99 | image
100 |
101 | Use lxml because it supports XPath and auto-validation from an RNG file
102 | to strip formatting, mixed-WS stuff, etc. use this XPath:
103 | descendant-or-self::text()
104 | that will return a list aa, for which you can "".join(aa)
105 | http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/xpath.html
106 |
107 | a-
108 |
109 | probably want to parse iteratively; here are some tips:
110 | http://www.ibm.com/developerworks/xml/library/x-hiperfparse/
111 |
112 | XPath notes...
113 | drink//price
114 | From the descendants of "drink", select all "price" elements.
115 | inventory/drink/pop@supplier
116 | Selects the supplier attribute of pop
117 | amount/..
118 | This expression uses a relative path location and selects all of the amount elements in our XML document. It then uses the parent sequence ".." to select the parent element of each amount element. In our XML document, there are three amount elements, and their parents are lemonade, pop and chips
119 | inventory/snack/chips/*
120 | selects all children of chips
121 | inventory/*/*/*
122 | selects all great-grandchildren of inventory (I think)
123 | more: http://www.tizag.com/xmlTutorial/xpathbar.php
124 |
125 | ---------------------------------------
126 | PLUGIN DEVELOPMENT
127 |
128 | READ THIS:
129 | http://ankisrs.net/docs/dev/addons.html
130 |
131 |
132 | ---------------------------------------
133 | PLUGIN DEVELOPMENT - JON'S NOTES
134 |
135 | - Using File, Download, Shared Plugin, download a sample plugin.
136 | - Choose Settings, Plugins, Open Plugin Folder, and then open the plugin's code in a text editor.
137 | - Review it to see how it works and if it looks safe, then restart Anki and try out the new functionality.
138 | - Anki runs (imports? loads?) the plugin code once when you start Anki, so if you need to edit the plugin code, you'll need to save it and restart Anki in order for your changes to take effect.
139 | - To create your own plugin, just write some code and save it into that same folder. Or, copy the sample plugin's file and adapt it to your needs.
140 |
141 | SAMPLE PLUGINS
142 |
143 | "Hello World" is a very simple plugin that does one thing: During a review, it interposes a custom event handler that will pop up a "Hello world!" message box if the user pressed the "r" key. After checking for this "r" keystroke, it passes control back to the default event handler.
144 |
145 | "For developers only: batch processing plugin example" functions in a different context--it runs a process (which you need to write yourself) on every currently-selected fact in the Browser window. The filename ends in ".py.OFF" by default, so you'll need to delete the ".OFF" part before launching Anki, or else Anki can't load the plugin. And the plugin won't actually do anything to your selected facts until you write code where the commented-out section is. Proceed carefully! If you insert the following snippet of code there, it will append a period to the Front field in every selected fact.
146 | for fact in facts: fact["Front"] += "."
147 |
148 | API AND SOURCE CODE
149 |
150 | The plugins that are out there give practical examples of how to use the Anki API, but that API is not yet documented, so to really understand it, you may want to download the Anki source code, and look at the following modules, perhaps in the following order:
151 | -
152 | - ???
153 |
154 | Hint-peeking
155 | Image Download
156 | Learn Mode
157 | ?Interval Tweak
158 |
159 | consider also installing:
160 | Resize Images
161 | Speak Slower
162 | Custom Media Player 2 (needed for non-ASCII filenames?)
163 | ?Reset Creation Times
164 |
165 | v2:
166 | consider something like Real-Time Import for auto-update from FLEx/WeSay.
167 |
168 |
--------------------------------------------------------------------------------
/flashgrab-v0.8.7.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/flashgrab-v0.8.7.zip
--------------------------------------------------------------------------------
/icons/flashgrid-flashgrab-icons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/icons/flashgrid-flashgrab-icons.png
--------------------------------------------------------------------------------
/icons/readme.txt:
--------------------------------------------------------------------------------
1 | These are some icon ideas I created a while back.
2 |
3 | I'm not sure how useful it would be to have these showing on our dialogs, etc.
4 |
5 | If so, it would be good to see whether the star shape impinges too much on the official Anki icon.
6 |
7 | -Jon
--------------------------------------------------------------------------------
/readme-developers.txt:
--------------------------------------------------------------------------------
1 | Here is some info on getting started developing this addon, or Anki in general.
2 |
3 | Much of it--but not all--is focused on setting up a Linux VM as the dev/test machine, although with a few manual steps dev can be done on Windows too.
4 |
5 | https://github.com/sillsdev/flashgrab/wiki/How-to-run-and-debug-Anki-in-a-Linux-virtual-machine-or-in-Windows
6 |
--------------------------------------------------------------------------------
/sample-src/Indonesian Vocab 2015-01-30 1445 tagged-in-notes.fwbackup:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/sample-src/Indonesian Vocab 2015-01-30 1445 tagged-in-notes.fwbackup
--------------------------------------------------------------------------------
/sample-src/Indonesian Vocab 2015-02-04 2149 updated media.fwbackup:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/sample-src/Indonesian Vocab 2015-02-04 2149 updated media.fwbackup
--------------------------------------------------------------------------------
/sample-src/readme-indo-sample.txt:
--------------------------------------------------------------------------------
1 | This is a backup of the master project in FLEx that was used
2 | for all the editing of this deck.
3 |
4 | It was
5 | - exported to LIFT,
6 | - synced one-way into Anki using FlashGrab, and then
7 | - the records in Anki were manually tagged based on the Notes (Comment) field.
8 |
--------------------------------------------------------------------------------
/syncx_local.py:
--------------------------------------------------------------------------------
1 | ''' A test script.
2 | @author: Jonathan Coombs, copyright sil.org 2012-2014
3 | '''
4 |
5 | #Force Python 3 syntax
6 |
7 |
8 |
9 | from syncxml import local_launch
10 | from syncxml import xml_util as X
11 | from syncxml import syncxml as SX
12 | from syncxml import logg as L
13 |
14 | import os
15 |
16 | def get_path(name):
17 | f = SX.get_first_lift_file(os.path.join("..", "tests", name))
18 | return f
19 |
20 | def get_lift_paths():
21 | L = os.listdir(os.path.join("..", "tests"))
22 | L2 = [get_path(x) for x in L]
23 | return L2
24 |
25 |
26 |
27 | def count_ws():
28 | pass
29 |
30 | def tests():
31 | """Some quick and dirty tests on auto-configuring and reading some actual LIFT files.
32 | Note: on my test machine, some of these have corresponding FLEx media folders, here:
33 | C:\ProgramData\SIL\FieldWorks\Projects\Bambara flashcards\LinkedFiles
34 | C:\ProgramData\SIL\FieldWorks\Projects\Moma-temp\LinkedFiles"""
35 | L.init(SX.LOG_FP, L.VERBOSITY_DEBUG)
36 | paths = get_lift_paths()
37 | #expected = dict()
38 | #expected['cgg'] =
39 | from time import sleep
40 | for p in paths:
41 | """ WAS:
42 | # analyze the WSes in this file
43 | size = os.path.getsize(p)
44 | L.debug("File {} is {} bytes".format(p, size))
45 | lift_few = X.get_lift_subset(p, 30) # .tmp
46 | langs = X.XmlLiftLangs(lift_few)
47 | v = langs.vern_ws
48 | n = langs.nat_ws
49 | os.remove(lift_few)
50 | """
51 | local_launch.main(p)
52 | sleep(0.5) # give Notepad a chance to open
53 |
54 | #count_ws()
55 | L.close_file()
56 |
57 |
58 | if (__name__ == '__main__'):
59 | # If we're here, we're not running inside Anki, but we can still test/debug various pieces...
60 | os.chdir('syncxml') # go down one level, so we're in the same relative position as the Anki add-on
61 |
62 | tests()
63 | local_launch.main()
64 |
--------------------------------------------------------------------------------
/syncxml/README.md:
--------------------------------------------------------------------------------
1 | FlashGrab (a.k.a. lexml2anki, SyncFromXml, etc.)
2 | ==========
3 |
4 | An Anki addon for pulling flashcard data (one-way sync) from XML. Optimized for LIFT XML (from WeSay or FLEx).
5 | See the Anki website's list of provided addons.
6 | https://ankiweb.net/shared/info/915685712
7 |
8 | ..
--------------------------------------------------------------------------------
/syncxml/SyncFromXML_config_default.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
11 |
12 |
13 |
14 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
71 |
72 |
--------------------------------------------------------------------------------
/syncxml/SyncFromXML_help.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/SyncFromXML_help.pdf
--------------------------------------------------------------------------------
/syncxml/SyncFromXML_log.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/SyncFromXML_log.txt
--------------------------------------------------------------------------------
/syncxml/SyncFromXML_readme.txt:
--------------------------------------------------------------------------------
1 | The "Sync one-way from XML" addon for Anki pulls data into Anki from one or more external XML sources, as
2 | determined by a config file. It does not modify fields that are specific to Anki (e.g. it does not reset any existing
3 | flashcards), but it WILL OVERWRITE any edits that you've made within Anki to the data that was pulled in. The
4 | assumption is that you want to do your editing elsewhere and simply pull the latest data into Anki.
5 |
6 | Note: if you'll be importing LIFT data, the first thing the auto-config does is import the
7 | .apkg file. (It is located in the samples folder next to this readme file; that is, in
8 | %userprofile%\My Documents\Anki\addons\syncxml\samples .) This will put the needed structures in place so that
9 | there are enough fields in the Anki deck to receive several different dictionary fields.
10 |
11 | After that, the wizard will try to auto-configure itself for your LIFT data.
12 |
13 | The rest of this readme file describes the details of the config file. For general documentation, see
14 | SyncFromXML_help.pdf instead. (You may find also find the log file useful for troubleshooting during the initial
15 | configuration.)
16 |
17 | CONFIG DETAILS:
18 |
19 | The configuration (SyncFromXML_config.txt) file tells the addon:
20 | - where to find the XML file to pull its data from,
21 | - where precisely the data is located within that source file, and
22 | - where to store it in Anki.
23 |
24 | Before running the addon, try editing the config file by specifying the source file paths and carefully replacing the writing system codes between the " or {} marks. Also make sure that the correct model / note type (e.g. DICT_LIFT) is available in Anki (the easiest way to do this is to import the preconfigured sample deck), and make sure that 'deckname' refers to the Anki deck you wish to copy your data into. This should be enough to start pulling data in from your LIFT file.
25 |
26 |
27 | CONFIGURATION OPTIONS:
28 |
29 | source_file, source, source_field: To start pulling data into Anki, the source_file path needs to point to an XML file, and there needs to be a valid 'source' element containing at least one valid 'source_field' element. You can supply multiple source files if you wish, by specifying multiple 'source' elements. (And a single source file may be used more than once.)
30 |
31 | To load from the addon's 'samples' subfolder, just a short relative path will suffice for each path. Example:
32 | source_file="samples/Lindu-Test.lift"
33 | source_audio_folder="samples/audio"
34 | source_image_folder="samples/pictures" >
35 | But assuming your own source file is located elsewhere, you'll want to specify absolute paths. Example:
36 | source_file="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\Lindu-Test.lift"
37 | source_audio_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\audio"
38 | source_image_folder="D:\files\user7\documents\LIFT-from-FLEx\Lindu-Test\pictures" >
39 |
40 | audio_folder, image_folder: The base paths for finding any media files whose links are not already stored as absolute paths in the XML file.
41 |
42 | base_xpath: The base_xpath for each source matches on the root element that will correspond to one Anki note, and all source_field elements must be relative to that base path, representing the fields that will be copied over into that note. For example, given a LIFT file as input, the element would typically be specified as the root, and each source_field would point to something within , such as Lexeme Form, Gloss, Illustration, etc.
43 |
44 | grab: Typically, you should specify grab="first" for each field, in which case only the first matching value will be copied over. But for non-media fields, you may also specify grab="concat_all", in which case multiple values will be concatenated (separated by a separator such as ;;). In the sample config file, multiple glosses and multiple definitions are handled using concat_all.
45 |
46 | autodelete: This should be set to "false" since this feature is not yet implemented.
47 |
48 |
49 | ASSUMPTIONS AND LIMITATIONS:
50 | - Each sync operation will OVERWRITE the matching data in any matching Anki note. This is intentional, allowing you to do all of your editing externally rather than doing duplicate work in Anki.
51 | - Currently, *all* matching records are imported from the source file. If you want a filtered subset, you'll need to trim down the XML file. (E.g. in FLEx, you can choose to filter down to a desired subset before exporting to LIFT XML.)
52 |
53 |
54 | DEVELOPER NOTES:
55 | - Standard XPath syntax is used in this config file, except that quotes need to be escaped, either using the standard "ws" syntax, or using my non-standard {ws} syntax. (The addon supports the latter for convenience/readability.)
56 | - The XPath actually used by the sync software will consist of (a) the base_path, plus (b) the field's own xpath fragment, plus (c) descendant-or-self::text() .
57 | - Anything involving links would be hard to pull in cleanly, since these are stored as (long) IDs in LIFT. XPath has no way of
58 | knowing how to follow LIFT links. However, the first part of the ID is usually readable, so pulling the whole thing into
59 | an Anki field might be barely usable, though ugly, as in this example for synonyms. Ditto for other links (e.g. confer).
60 |
61 | A link to "absuwelto" would display like this: absuwelto_e50fee0e-45d3-43d3-bc71-b6cd4983607c
62 |
63 | Probably not worth it. And doing this for variants or complex forms specifically is even trickier, since distinguishing between
64 | them would require looking deeper down first, into the nested element, to decide whether or not its parent is a match.
65 |
66 | - The XPath syntax supported by this addon is all standard, with one tweak: since these xpaths are embedded here
67 | in XML elements, quotation marks should not be used directly. Rather than using " every time, curly brackets are supported.
68 |
69 | - I think the following ought to work, but doesn't; the syntax for reading attributes instead of filtering on them
70 | seems poorly supported (though it works for the root element's guid attribute):
71 |
72 |
--------------------------------------------------------------------------------
/syncxml/__init__.py:
--------------------------------------------------------------------------------
1 | from syncxml import SyncFromXML
--------------------------------------------------------------------------------
/syncxml/anki_util.py:
--------------------------------------------------------------------------------
1 | '''
2 | Code for interacting with Anki. If running locally (outside Anki),
3 | very little will work, though it does fall back to local folders a bit.
4 | '''
5 |
6 | #Force Python 3 syntax
7 |
8 | import os, sys
9 |
10 | try:
11 | from aqt import mw
12 | IN_ANKI = True
13 | #anki_user_profile = mw.pm.profileFolder() #can't do this yet; mw is still null
14 | from anki.importing import AnkiPackageImporter
15 | except ImportError:
16 | IN_ANKI = False
17 | anki_user_profile = '../User 1' #'D:\\files\\user57\\documents\\Anki\\User 1'
18 |
19 | ANKI_MEDIA_FOLDER = 'collection.media'
20 |
21 | ADDON_FOLDER = 'syncxml'
22 | APKG_PATH = os.path.join("samples", "lift-dictionary.apkg")
23 | NO_MODEL_INSTR = "\nSteps: Restart Anki and make sure the target deck exists. Use Tools, Manage Note Types, or go to File Import, and import Anki/addons/syncxml/{}. Or, reconfigure.)".format(APKG_PATH)
24 | NO_MODEL_MSG = "The target model with the fields we needed was missing; have attempted to re-import the default APKG but an error occurred. \nPlease try again, or create it manually. \n{}".format(NO_MODEL_INSTR)
25 | TARGET_DECK = "lift-dictionary"
26 |
27 | def anki_model_exists(model):
28 | if (mw.col.models.byName(model)):
29 | return True
30 | return False
31 |
32 |
33 |
34 | def addons_folder():
35 | fld = ".."
36 | if IN_ANKI:
37 | fld = mw.pm.addonFolder()
38 | return fld
39 |
40 | def get_filepath(f):
41 | return os.path.join(addons_folder(), ADDON_FOLDER, f)
42 |
43 |
44 |
45 | def get_anki_fields(modelname): #(targ, source):
46 | """Given a model name, return a list/set of its fields."""
47 |
48 | n = ['Lexeme Form',
49 | 'Citation Form',
50 | 'Lex GUID',
51 | 'Lex Audio',
52 | 'Grammatical Info',
53 | 'Glosses',
54 | 'Definitions',
55 | 'Reversals',
56 | 'Picture',
57 | 'Example',
58 | 'Example Audio',
59 | 'Example Translation',
60 | 'xRxeversalxs']
61 | if not IN_ANKI:
62 | return n #return hard-coded value
63 |
64 | model = mw.col.models
65 | mdict = model.byName(modelname)
66 | n = [x['name'] for x in mdict['flds']]
67 | return n
68 |
69 | def add_field(key, modelname):
70 | model = mw.col.models
71 | mdict = model.byName(modelname)
72 | model.setCurrent(mdict)
73 | f = model.newField(key)
74 | #mw.col.models.flush()
75 | model.addField(mdict, f)
76 | model.save()
77 |
78 | def import_apkg_model(path, delete=False):
79 | #If delete==True, our sole purpose is to import the note types and flashcard templates, not the data.
80 |
81 | imp = AnkiPackageImporter(mw.col, path)
82 | imp.run()
83 | mw.col.models.flush()
84 | mw.reset(True) # refresh the screen so that the newly imported deck is visible
85 |
86 | if delete:
87 | # Delete ALL cards/notes in the LIFT deck. Ignore errors
88 | try:
89 | # deck = mw.col.decks.byName(TARGET_DECK) #hmm, this returns a Python dict(); how to get the deck object?
90 | # assert deck.cardCount() > 0
91 | # assert deck.noteCount() > 0
92 | ids = mw.col.findCards("deck:{}".format(TARGET_DECK))
93 | mw.col.remCards(ids, True)
94 | # assert deck.cardCount() == 0
95 | # assert deck.noteCount() == 0
96 | except:
97 | return "Failed to delete cards and notes after importing the APKG file."
98 |
99 | mw.col.models.flush()
100 | mw.reset(True)
101 | return ""
102 |
--------------------------------------------------------------------------------
/syncxml/docsrc/SyncFromXML_help.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/SyncFromXML_help.odt
--------------------------------------------------------------------------------
/syncxml/docsrc/checklog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/checklog.png
--------------------------------------------------------------------------------
/syncxml/docsrc/customtype.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/customtype.png
--------------------------------------------------------------------------------
/syncxml/docsrc/customtype_sm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/customtype_sm.png
--------------------------------------------------------------------------------
/syncxml/docsrc/flex_export.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/flex_export.png
--------------------------------------------------------------------------------
/syncxml/docsrc/menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/menu.png
--------------------------------------------------------------------------------
/syncxml/docsrc/report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/report.png
--------------------------------------------------------------------------------
/syncxml/docsrc/sync-report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/sync-report.png
--------------------------------------------------------------------------------
/syncxml/docsrc/wizard-done.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard-done.png
--------------------------------------------------------------------------------
/syncxml/docsrc/wizard-examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard-examples.png
--------------------------------------------------------------------------------
/syncxml/docsrc/wizard1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard1.png
--------------------------------------------------------------------------------
/syncxml/docsrc/wizard2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/docsrc/wizard2.png
--------------------------------------------------------------------------------
/syncxml/file_util.py:
--------------------------------------------------------------------------------
1 | """distutils.file_util
2 |
3 | Utility functions for operating on single files.
4 | """
5 |
6 | __all__ = [
7 | # public symbols
8 | "copy_file",
9 | "move_file",
10 | "write_file",
11 | ]
12 |
13 | __revision__ = "$Id$"
14 |
15 | import os
16 | from distutils.errors import DistutilsFileError
17 | from distutils import log
18 |
19 | # for generating verbose output in 'copy_file()'
20 | _copy_action = {None: 'copying',
21 | 'hard': 'hard linking',
22 | 'sym': 'symbolically linking'}
23 |
24 |
25 | def _copy_file_contents(src, dst, buffer_size=16*1024):
26 | """Copy the file 'src' to 'dst'.
27 |
28 | Both must be filenames. Any error opening either file, reading from
29 | 'src', or writing to 'dst', raises DistutilsFileError. Data is
30 | read/written in chunks of 'buffer_size' bytes (default 16k). No attempt
31 | is made to handle anything apart from regular files.
32 | """
33 | # Stolen from shutil module in the standard library, but with
34 | # custom error-handling added.
35 | fsrc = None
36 | fdst = None
37 | try:
38 | try:
39 | fsrc = open(src, 'rb')
40 | except os.error as xxx_todo_changeme3:
41 | (errno, errstr) = xxx_todo_changeme3.args
42 | raise DistutilsFileError("could not open '%s': %s" % (src, errstr))
43 |
44 | if os.path.exists(dst):
45 | try:
46 | os.unlink(dst)
47 | except os.error as xxx_todo_changeme:
48 | (errno, errstr) = xxx_todo_changeme.args
49 | raise DistutilsFileError(
50 | "could not delete '%s': %s" % (dst, errstr))
51 |
52 | try:
53 | fdst = open(dst, 'wb')
54 | except os.error as xxx_todo_changeme4:
55 | (errno, errstr) = xxx_todo_changeme4.args
56 | raise DistutilsFileError(
57 | "could not create '%s': %s" % (dst, errstr))
58 |
59 | while 1:
60 | try:
61 | buf = fsrc.read(buffer_size)
62 | except os.error as xxx_todo_changeme1:
63 | (errno, errstr) = xxx_todo_changeme1.args
64 | raise DistutilsFileError(
65 | "could not read from '%s': %s" % (src, errstr))
66 |
67 | if not buf:
68 | break
69 |
70 | try:
71 | fdst.write(buf)
72 | except os.error as xxx_todo_changeme2:
73 | (errno, errstr) = xxx_todo_changeme2.args
74 | raise DistutilsFileError(
75 | "could not write to '%s': %s" % (dst, errstr))
76 |
77 | finally:
78 | if fdst:
79 | fdst.close()
80 | if fsrc:
81 | fsrc.close()
82 |
83 | def copy_file(src, dst, preserve_mode=1, preserve_times=1, update=0,
84 | link=None, verbose=1, dry_run=0):
85 | """Copy a file 'src' to 'dst'.
86 |
87 | If 'dst' is a directory, then 'src' is copied there with the same name;
88 | otherwise, it must be a filename. (If the file exists, it will be
89 | ruthlessly clobbered.) If 'preserve_mode' is true (the default),
90 | the file's mode (type and permission bits, or whatever is analogous on
91 | the current platform) is copied. If 'preserve_times' is true (the
92 | default), the last-modified and last-access times are copied as well.
93 | If 'update' is true, 'src' will only be copied if 'dst' does not exist,
94 | or if 'dst' does exist but is older than 'src'.
95 |
96 | 'link' allows you to make hard links (os.link) or symbolic links
97 | (os.symlink) instead of copying: set it to "hard" or "sym"; if it is
98 | None (the default), files are copied. Don't set 'link' on systems that
99 | don't support it: 'copy_file()' doesn't check if hard or symbolic
100 | linking is available.
101 |
102 | Under Mac OS, uses the native file copy function in macostools; on
103 | other systems, uses '_copy_file_contents()' to copy file contents.
104 |
105 | Return a tuple (dest_name, copied): 'dest_name' is the actual name of
106 | the output file, and 'copied' is true if the file was copied (or would
107 | have been copied, if 'dry_run' true).
108 | """
109 | # XXX if the destination file already exists, we clobber it if
110 | # copying, but blow up if linking. Hmmm. And I don't know what
111 | # macostools.copyfile() does. Should definitely be consistent, and
112 | # should probably blow up if destination exists and we would be
113 | # changing it (ie. it's not already a hard/soft link to src OR
114 | # (not update) and (src newer than dst).
115 |
116 | from distutils.dep_util import newer
117 | from stat import ST_ATIME, ST_MTIME, ST_MODE, S_IMODE
118 |
119 | if not os.path.isfile(src):
120 | raise DistutilsFileError(
121 | "can't copy '%s': doesn't exist or not a regular file" % src)
122 |
123 | if os.path.isdir(dst):
124 | dir = dst
125 | dst = os.path.join(dst, os.path.basename(src))
126 | else:
127 | dir = os.path.dirname(dst)
128 |
129 | if update and not newer(src, dst):
130 | if verbose >= 1:
131 | log.debug("not copying %s (output up-to-date)", src)
132 | return dst, 0
133 |
134 | try:
135 | action = _copy_action[link]
136 | except KeyError:
137 | raise ValueError("invalid value '%s' for 'link' argument" % link)
138 |
139 | if verbose >= 1:
140 | if os.path.basename(dst) == os.path.basename(src):
141 | log.info("%s %s -> %s", action, src, dir)
142 | else:
143 | log.info("%s %s -> %s", action, src, dst)
144 |
145 | if dry_run:
146 | return (dst, 1)
147 |
148 | # If linking (hard or symbolic), use the appropriate system call
149 | # (Unix only, of course, but that's the caller's responsibility)
150 | if link == 'hard':
151 | if not (os.path.exists(dst) and os.path.samefile(src, dst)):
152 | os.link(src, dst)
153 | elif link == 'sym':
154 | if not (os.path.exists(dst) and os.path.samefile(src, dst)):
155 | os.symlink(src, dst)
156 |
157 | # Otherwise (non-Mac, not linking), copy the file contents and
158 | # (optionally) copy the times and mode.
159 | else:
160 | _copy_file_contents(src, dst)
161 | if preserve_mode or preserve_times:
162 | st = os.stat(src)
163 |
164 | # According to David Ascher , utime() should be done
165 | # before chmod() (at least under NT).
166 | if preserve_times:
167 | os.utime(dst, (st[ST_ATIME], st[ST_MTIME]))
168 | if preserve_mode:
169 | os.chmod(dst, S_IMODE(st[ST_MODE]))
170 |
171 | return (dst, 1)
172 |
173 | # XXX I suspect this is Unix-specific -- need porting help!
174 | def move_file (src, dst, verbose=1, dry_run=0):
175 | """Move a file 'src' to 'dst'.
176 |
177 | If 'dst' is a directory, the file will be moved into it with the same
178 | name; otherwise, 'src' is just renamed to 'dst'. Return the new
179 | full name of the file.
180 |
181 | Handles cross-device moves on Unix using 'copy_file()'. What about
182 | other systems???
183 | """
184 | from os.path import exists, isfile, isdir, basename, dirname
185 | import errno
186 |
187 | if verbose >= 1:
188 | log.info("moving %s -> %s", src, dst)
189 |
190 | if dry_run:
191 | return dst
192 |
193 | if not isfile(src):
194 | raise DistutilsFileError("can't move '%s': not a regular file" % src)
195 |
196 | if isdir(dst):
197 | dst = os.path.join(dst, basename(src))
198 | elif exists(dst):
199 | raise DistutilsFileError(
200 | "can't move '%s': destination '%s' already exists" %
201 | (src, dst))
202 |
203 | if not isdir(dirname(dst)):
204 | raise DistutilsFileError(
205 | "can't move '%s': destination '%s' not a valid path" % \
206 | (src, dst))
207 |
208 | copy_it = 0
209 | try:
210 | os.rename(src, dst)
211 | except os.error as xxx_todo_changeme6:
212 | (num, msg) = xxx_todo_changeme6.args
213 | if num == errno.EXDEV:
214 | copy_it = 1
215 | else:
216 | raise DistutilsFileError(
217 | "couldn't move '%s' to '%s': %s" % (src, dst, msg))
218 |
219 | if copy_it:
220 | copy_file(src, dst, verbose=verbose)
221 | try:
222 | os.unlink(src)
223 | except os.error as xxx_todo_changeme5:
224 | (num, msg) = xxx_todo_changeme5.args
225 | try:
226 | os.unlink(dst)
227 | except os.error:
228 | pass
229 | raise DistutilsFileError(
230 | ("couldn't move '%s' to '%s' by copy/delete: " +
231 | "delete '%s' failed: %s") %
232 | (src, dst, src, msg))
233 | return dst
234 |
235 |
236 | def write_file (filename, contents):
237 | """Create a file with the specified name and write 'contents' (a
238 | sequence of strings without line terminators) to it.
239 | """
240 | f = open(filename, "w")
241 | try:
242 | for line in contents:
243 | f.write(line + "\n")
244 | finally:
245 | f.close()
246 |
--------------------------------------------------------------------------------
/syncxml/flex_util.py:
--------------------------------------------------------------------------------
1 | #Force Python 3 syntax
2 |
3 | from . import logg as L
4 | import os
5 |
6 | FLEX_DIR = r"C:\ProgramData\SIL\FieldWorks\Projects"
7 | FLEX_DIR_XP = r"C:\Documents and Settings\All Users\Application Data\SIL\FieldWorks\Projects"
8 | FLEX_DIR_LINUX = r"~/.local/share/fieldworks/Projects"
9 | LINKED_FILES = "LinkedFiles"
10 | FLEX_AUDIO = "AudioVisual"
11 | FLEX_IMAGE = "Pictures"
12 |
13 | def flex_dir():
14 | d = ""
15 | tmp = os.path.expanduser(FLEX_DIR_LINUX)
16 | if os.path.exists(tmp):
17 | d = tmp
18 | elif os.path.exists(FLEX_DIR): # Vista and later have priority
19 | d = FLEX_DIR
20 | elif os.path.exists(FLEX_DIR_XP):
21 | d = FLEX_DIR_XP
22 | return d
23 |
24 | def flex_media(f, dir=""):
25 | """Given an absolute filepath such as c:\files\Catalan.lift, and the FLEx project folder,
26 | check whether audio and image folders are likely to be available under, say,
27 | C:\ProgramData\SIL\FieldWorks\Projects\Catalan\LinkedFiles
28 | Or, given just an absolute path, assume it's an fwdata file and deduce accordingly.
29 | """
30 | media_dir = ""
31 | if dir:
32 | base = os.path.split(f)[1] # e.g. just Catalan.lift
33 | proj = os.path.splitext(base)[0] # e.g. just Catalan
34 | media_dir = os.path.join(dir, proj, LINKED_FILES)
35 | else:
36 | # assume .fwdata
37 | dir, _f = os.path.split(f)
38 | media_dir = os.path.join(dir, LINKED_FILES)
39 |
40 | media_dir = os.path.normpath(media_dir) #fix any inconsistent slashes
41 | L.debug("Selected file is {}.\n Checking to see if media_dir {} exists...".format(f, media_dir))
42 | if os.path.exists(media_dir):
43 | # Simply assume both of the following do or will exist
44 | au = os.path.join(media_dir, FLEX_AUDIO)
45 | im = os.path.join(media_dir, FLEX_IMAGE)
46 | L.debug(" It does. choosing {} and {}".format(au, im))
47 | return (au, im)
48 | else:
49 | L.debug(" It does not exist.")
50 | return None
51 |
52 |
--------------------------------------------------------------------------------
/syncxml/glob.py:
--------------------------------------------------------------------------------
1 | """Filename globbing utility."""
2 |
3 | import sys
4 | import os
5 | import re
6 | import fnmatch
7 |
8 | try:
9 | _unicode = str
10 | except NameError:
11 | # If Python is built without Unicode support, the unicode type
12 | # will not exist. Fake one.
13 | class _unicode(object):
14 | pass
15 |
16 | __all__ = ["glob", "iglob"]
17 |
18 | def glob(pathname):
19 | """Return a list of paths matching a pathname pattern.
20 |
21 | The pattern may contain simple shell-style wildcards a la
22 | fnmatch. However, unlike fnmatch, filenames starting with a
23 | dot are special cases that are not matched by '*' and '?'
24 | patterns.
25 |
26 | """
27 | return list(iglob(pathname))
28 |
29 | def iglob(pathname):
30 | """Return an iterator which yields the paths matching a pathname pattern.
31 |
32 | The pattern may contain simple shell-style wildcards a la
33 | fnmatch. However, unlike fnmatch, filenames starting with a
34 | dot are special cases that are not matched by '*' and '?'
35 | patterns.
36 |
37 | """
38 | dirname, basename = os.path.split(pathname)
39 | if not has_magic(pathname):
40 | if basename:
41 | if os.path.lexists(pathname):
42 | yield pathname
43 | else:
44 | # Patterns ending with a slash should match only directories
45 | if os.path.isdir(dirname):
46 | yield pathname
47 | return
48 | if not dirname:
49 | for name in glob1(os.curdir, basename):
50 | yield name
51 | return
52 | # `os.path.split()` returns the argument itself as a dirname if it is a
53 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
54 | # contains magic characters (i.e. r'\\?\C:').
55 | if dirname != pathname and has_magic(dirname):
56 | dirs = iglob(dirname)
57 | else:
58 | dirs = [dirname]
59 | if has_magic(basename):
60 | glob_in_dir = glob1
61 | else:
62 | glob_in_dir = glob0
63 | for dirname in dirs:
64 | for name in glob_in_dir(dirname, basename):
65 | yield os.path.join(dirname, name)
66 |
67 | # These 2 helper functions non-recursively glob inside a literal directory.
68 | # They return a list of basenames. `glob1` accepts a pattern while `glob0`
69 | # takes a literal basename (so it only has to check for its existence).
70 |
71 | def glob1(dirname, pattern):
72 | if not dirname:
73 | dirname = os.curdir
74 | if isinstance(pattern, _unicode) and not isinstance(dirname, str):
75 | dirname = str(dirname, sys.getfilesystemencoding() or
76 | sys.getdefaultencoding())
77 | try:
78 | names = os.listdir(dirname)
79 | except os.error:
80 | return []
81 | if pattern[0] != '.':
82 | names = [x for x in names if x[0] != '.']
83 | return fnmatch.filter(names, pattern)
84 |
85 | def glob0(dirname, basename):
86 | if basename == '':
87 | # `os.path.split()` returns an empty basename for paths ending with a
88 | # directory separator. 'q*x/' should match only directories.
89 | if os.path.isdir(dirname):
90 | return [basename]
91 | else:
92 | if os.path.lexists(os.path.join(dirname, basename)):
93 | return [basename]
94 | return []
95 |
96 |
97 | magic_check = re.compile('[*?[]')
98 |
99 | def has_magic(s):
100 | return magic_check.search(s) is not None
101 |
--------------------------------------------------------------------------------
/syncxml/local_launch.py:
--------------------------------------------------------------------------------
1 | '''
2 | A test module. Does some minimal loading. Outside of Anki, we can't do much, but we can at least parse the input file.
3 | Needs to be imported from a script somewhere outside this package, and the osdir set to this one.
4 | (See syncx_local .)
5 |
6 | @author: Jonathan Coombs, copyright sil.org 2012-2014
7 | '''
8 |
9 | #Force Python 3 syntax
10 |
11 |
12 | from . import syncxml as SX
13 | from . import SyncFromXML as UI
14 | from . import logg as L
15 | import os
16 |
17 | def main(target=""):
18 | ''' Runs a full set of tests on the non-Anki side of things. '''
19 |
20 | #If we're here, we're not running inside Anki, but we can still test/debug various pieces...
21 | L.init(SX.LOG_FP, L.VERBOSITY_DEBUG)
22 | L.debug('\nRUNNING LOCALLY!\n')
23 |
24 | try:
25 | # full test
26 | if UI.reconfigure(target):
27 | cfpath = SX.get_config_path()
28 | if not os.path.exists(cfpath):
29 | ##log error
30 | return
31 |
32 | ##L.debug('Reconfigure was successful.') # So we know it's a success
33 | UI.try_sync()
34 | ##L.debug('Done--success.') # So we know it's a success
35 | else:
36 | pass
37 | ##L.debug('Done--failed.')
38 | finally:
39 | UI.launch_paths_maybe()
40 | #L.close_file()
41 |
42 |
--------------------------------------------------------------------------------
/syncxml/logg.py:
--------------------------------------------------------------------------------
1 | '''
2 | A simple logger.
3 |
4 | @author: Jonathan Coombs, copyright sil.org 2012-2014
5 | '''
6 |
7 | #Force Python 3 syntax
8 |
9 | import os
10 | import platform
11 |
12 | VERBOSITY_SILENT = 0
13 | VERBOSITY_MINIMAL = 1
14 | VERBOSITY_NORMAL = 2
15 | VERBOSITY_DEBUG = 3
16 | VERBOSITY = VERBOSITY_DEBUG # Normally, choose VERBOSITY_NORMAL or VERBOSITY_DEBUG; VERBOSITY_SILENT might possibly help with unicode-related crashes.
17 |
18 | # It's ok if client wants to set these differently, but remember it's a singleton
19 | LOG_PATH = ''
20 | LOG_FILE = 'logg.txt'
21 |
22 | __MAIN_LOG = None # Mimics a singleton by using the following module-level methods
23 | # Note that singletons aren't great, but ok for logging: http://googletesting.blogspot.com/2008/08/root-cause-of-singletons.html"""
24 | # Formerly I was doing something similar to this singleton: http://code.activestate.com/recipes/102263-logfilepy-a-singleton-log-file-creator/
25 |
26 | def close_file():
27 | ''' Call this before telling the user to check the log, so they can see it all.
28 | This releases the log file without blowing away its object's error count, etc.
29 | You cannot then write to the log again without first doing a reset() or init().'''
30 | if __MAIN_LOG:
31 | __MAIN_LOG.close_file()
32 |
33 | def init(fp=os.path.join(LOG_PATH, LOG_FILE), v=VERBOSITY):
34 | '''Must be called before you can start logging.'''
35 | global __MAIN_LOG
36 | close_file()
37 | __MAIN_LOG = Lg(fp, v)
38 |
39 | def reset():
40 | init(__MAIN_LOG.path, __MAIN_LOG.verbosity)
41 |
42 | #def close(): #Not needed? Just do init?
43 | # """Basically de-instantiates the singleton (in case the user might run the calling code again in the same session)"""
44 | # release()
45 | # __MAIN_LOG = None
46 |
47 | def get_main_log(): return __MAIN_LOG
48 |
49 | # convenience methods:
50 | def error(msg): __MAIN_LOG.error(msg)
51 | def warn(msg): __MAIN_LOG.warn(msg)
52 | def w(msg): __MAIN_LOG.w(msg)
53 | def debug(msg): __MAIN_LOG.debug(msg)
54 | def error_count(): return __MAIN_LOG.error_count()
55 | def warn_count(): return __MAIN_LOG.warn_count()
56 |
57 |
58 | class Lg:
59 | '''A log class that both prints to stdout and writes to a log file.
60 | This is not a singleton, so it could be used, say, for a secondary log.'''
61 |
62 | def __init__(self,fp=os.path.join(LOG_PATH, LOG_FILE), v=VERBOSITY):
63 | self.path = fp
64 | self.verbosity = v
65 | self._num_errors, self._num_warnings = 0, 0
66 |
67 | self.f = open(fp, 'w+b') #will silently overwrite the log file
68 | self._w('Logging (verbosity level {}). Python interpreter is version {}.'.format(v, platform.python_version()))
69 | self._w('You are encouraged to search this log for "MSG:" labels to review errors and warnings.')
70 | self._w('This log is *overwritten* each time the addon runs, unless you make a copy.\n')
71 |
72 | def _w(self, msg, label = ''):
73 | ''' Write to the log. (Rewrite this method if we ever upgrade to Python 3.) '''
74 | s = '{}{}\n'.format(label, msg)
75 | out = s.encode('utf-8') # don't crash the file output!
76 | self.f.write(out)
77 | s = s.encode('ASCII', 'replace') # don't crash the console!
78 | print(s)
79 |
80 | def w(self, msg, label=''):
81 | if self.verbosity >= VERBOSITY_NORMAL:
82 | self._w(msg, label)
83 |
84 | def debug(self, msg):
85 | if self.verbosity >= VERBOSITY_DEBUG:
86 | self._w(msg)
87 |
88 | def warn(self, msg):
89 | self._num_warnings += 1
90 | if self.verbosity > VERBOSITY_SILENT:
91 | self._w(msg, 'WARNING MSG: ')
92 |
93 | def error(self, msg):
94 | self._num_errors += 1
95 | if self.verbosity > VERBOSITY_SILENT:
96 | self._w(msg, 'ERROR MSG: ')
97 |
98 | def close_file(self):
99 | if self.f:
100 | self.f.close()
101 |
102 | def error_count(self): return self._num_errors
103 |
104 | def warn_count(self): return self._num_warnings
105 |
106 |
--------------------------------------------------------------------------------
/syncxml/samples/Lindu-Test.lift:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 | roof
90 | atap
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 | shoulder
121 | bahu
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 | body
153 | tubuh
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 | bald
184 | gundul
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 | clean
221 | bersih
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
246 |
247 |
248 |
249 |
--------------------------------------------------------------------------------
/syncxml/samples/audio/634710572710000000nagali.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/audio/634710572710000000nagali.wav
--------------------------------------------------------------------------------
/syncxml/samples/audio/634710573956406250A LexExampleSentence.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/audio/634710573956406250A LexExampleSentence.wav
--------------------------------------------------------------------------------
/syncxml/samples/indonesian-lift-dictionary.apkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/indonesian-lift-dictionary.apkg
--------------------------------------------------------------------------------
/syncxml/samples/lift-dictionary.apkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/lift-dictionary.apkg
--------------------------------------------------------------------------------
/syncxml/samples/pictures/bersih.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/syncxml/samples/pictures/bersih.gif
--------------------------------------------------------------------------------
/tests/Moma/Moma.WeSayConfig:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
7 |
8 | LexEntry
9 | MultiText
10 | Word
11 | True
12 | EntryLexicalForm
13 | False
14 | False
15 | ZeroOr1
16 |
17 | Visible
18 |
19 | myl
20 | myl-Zxxx-x-audio
21 |
22 |
23 |
24 | LexEntry
25 | MultiText
26 | Citation Form
27 | False
28 | citation
29 | False
30 | False
31 | ZeroOr1
32 | NormallyHidden
33 |
34 | myl
35 |
36 |
37 |
38 | LexSense
39 | MultiText
40 | Definition (Meaning)
41 | True
42 | definition
43 | False
44 | True
45 | ZeroOr1
46 | Visible
47 |
48 | en
49 | en-Zxxx-x-audio
50 | es
51 | es-Zxxx-x-audio
52 |
53 |
54 |
55 | LexSense
56 | MultiText
57 | Gloss
58 | False
59 | gloss
60 | False
61 | True
62 | ZeroOr1
63 | NormallyHidden
64 |
65 | en
66 | en-Zxxx-x-audio
67 | es
68 | es-Zxxx-x-audio
69 |
70 |
71 |
72 | LexEntry
73 | MultiText
74 | Literal Meaning
75 | False
76 | literal-meaning
77 | False
78 | True
79 | ZeroOr1
80 | NormallyHidden
81 |
82 | en-Zxxx-x-audio
83 |
84 |
85 |
86 | PalasoDataObject
87 | MultiText
88 | Note
89 | False
90 | note
91 | True
92 | True
93 | ZeroOr1
94 | NormallyHidden
95 |
96 | en
97 |
98 |
99 |
100 | LexSense
101 | Picture
102 | Picture
103 | True
104 | Picture
105 | False
106 | False
107 | ZeroOr1
108 | NormallyHidden
109 |
110 | en-Zxxx-x-audio
111 |
112 |
113 |
114 | LexSense
115 | Option
116 | PartOfSpeech
117 | True
118 | POS
119 | False
120 | False
121 | ZeroOr1
122 | PartsOfSpeech.xml
123 | Visible
124 |
125 | en
126 |
127 |
128 |
129 | LexSense
130 | MultiText
131 | SILCAWL
132 | False
133 | SILCAWL
134 | True
135 | False
136 | ZeroOr1
137 | NormallyHidden
138 |
139 | en-Zxxx-x-audio
140 |
141 |
142 |
143 | LexExampleSentence
144 | MultiText
145 | Example Sentence
146 | True
147 | ExampleSentence
148 | False
149 | True
150 | ZeroOr1
151 |
152 | Visible
153 |
154 | myl
155 | myl-Zxxx-x-audio
156 |
157 |
158 |
159 | LexExampleSentence
160 | MultiText
161 | Example Translation
162 | True
163 | ExampleTranslation
164 | False
165 | True
166 | ZeroOr1
167 | Visible
168 |
169 | en
170 | en-Zxxx-x-audio
171 | es
172 | es-Zxxx-x-audio
173 |
174 |
175 |
176 | LexSense
177 | OptionCollection
178 | Sem Dom
179 | True
180 | semantic-domain-ddp4
181 | False
182 | False
183 | ZeroOr1
184 | Ddp4.xml
185 | NormallyHidden
186 |
187 | en
188 |
189 |
190 |
191 | LexEntry
192 | RelationToOneEntry
193 | Base Form
194 | False
195 | BaseForm
196 | False
197 | False
198 | ZeroOr1
199 | NormallyHidden
200 |
201 | myl
202 |
203 |
204 |
205 | LexEntry
206 | RelationToOneEntry
207 | Cross Reference
208 | False
209 | confer
210 | False
211 | False
212 | ZeroOrMore
213 | NormallyHidden
214 |
215 | myl
216 |
217 |
218 |
219 | Default View Template
220 |
221 |
222 |
223 |
226 |
229 |
232 |
233 | Add Meanings
234 | Add meanings (senses) to entries where they are missing.
235 | definition
236 | definition
237 | semantic-domain-ddp4
238 |
239 |
240 |
241 |
244 |
245 | Add Parts of Speech
246 | Add parts of speech to senses where they are missing.
247 | POS
248 | POS
249 | definition, ExampleSentence
250 |
251 |
252 |
253 |
256 |
257 | Add Example Sentences
258 | Add example sentences to senses where they are missing.
259 | ExampleSentence
260 | ExampleSentence
261 | definition
262 |
263 |
264 |
265 |
268 |
269 | Add Base Forms
270 | Identify the "base form" word that this word is built from. In the printed dictionary, the derived or variant words can optionally be shown as subentries of their base forms.
271 | BaseForm
272 | BaseForm
273 |
274 |
275 |
276 |
277 |
280 |
283 |
286 | SILCAWL
287 | en
288 |
289 |
292 | SILCAWL-MozambiqueAddendum
293 | en
294 |
295 |
298 | Ddp4Questions-en.xml
299 | False
300 |
301 |
302 |
303 |
306 |
307 |
--------------------------------------------------------------------------------
/tests/Moma/Moma.lift:
--------------------------------------------------------------------------------
1 |
2 |
5 |
10 |
11 |
15 |
19 |
20 |
22 |
24 |
25 |
29 |
33 |
37 |
38 |
40 |
41 |
42 |
47 |
48 |
52 |
56 |
57 |
59 |
61 |
62 |
66 |
70 |
74 |
75 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/tests/Moma/audio/adat-63531587437330.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/adat-63531587437330.wav
--------------------------------------------------------------------------------
/tests/Moma/audio/loka-63531587541460.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/loka-63531587541460.wav
--------------------------------------------------------------------------------
/tests/Moma/audio/loka-63531587545733.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/loka-63531587545733.wav
--------------------------------------------------------------------------------
/tests/Moma/audio/mata-63531587590792.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/mata-63531587590792.wav
--------------------------------------------------------------------------------
/tests/Moma/audio/mata-63531587597879.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/audio/mata-63531587597879.wav
--------------------------------------------------------------------------------
/tests/Moma/pictures/cultural law.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/pictures/cultural law.png
--------------------------------------------------------------------------------
/tests/Moma/pictures/sdd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sillsdev/flashgrab/2be228d9a4bf8cdbcf17d6412549b7c2071353d4/tests/Moma/pictures/sdd.png
--------------------------------------------------------------------------------
/this-is-an-eclipse-pydev-project.txt:
--------------------------------------------------------------------------------
1 | I.e. an Eclipse project (.project) that is specifically a Pydev project (.pydevproject).
--------------------------------------------------------------------------------
/xml/__init__.py:
--------------------------------------------------------------------------------
1 | """Core XML support for Python.
2 |
3 | This package contains four sub-packages:
4 |
5 | dom -- The W3C Document Object Model. This supports DOM Level 1 +
6 | Namespaces.
7 |
8 | parsers -- Python wrappers for XML parsers (currently only supports Expat).
9 |
10 | sax -- The Simple API for XML, developed by XML-Dev, led by David
11 | Megginson and ported to Python by Lars Marius Garshol. This
12 | supports the SAX 2 API.
13 |
14 | etree -- The ElementTree XML library. This is a subset of the full
15 | ElementTree XML release.
16 |
17 | """
18 |
19 |
20 | __all__ = ["dom", "parsers", "sax", "etree"]
21 |
--------------------------------------------------------------------------------
/xml/dom/NodeFilter.py:
--------------------------------------------------------------------------------
1 | # This is the Python mapping for interface NodeFilter from
2 | # DOM2-Traversal-Range. It contains only constants.
3 |
4 | class NodeFilter:
5 | """
6 | This is the DOM2 NodeFilter interface. It contains only constants.
7 | """
8 | FILTER_ACCEPT = 1
9 | FILTER_REJECT = 2
10 | FILTER_SKIP = 3
11 |
12 | SHOW_ALL = 0xFFFFFFFF
13 | SHOW_ELEMENT = 0x00000001
14 | SHOW_ATTRIBUTE = 0x00000002
15 | SHOW_TEXT = 0x00000004
16 | SHOW_CDATA_SECTION = 0x00000008
17 | SHOW_ENTITY_REFERENCE = 0x00000010
18 | SHOW_ENTITY = 0x00000020
19 | SHOW_PROCESSING_INSTRUCTION = 0x00000040
20 | SHOW_COMMENT = 0x00000080
21 | SHOW_DOCUMENT = 0x00000100
22 | SHOW_DOCUMENT_TYPE = 0x00000200
23 | SHOW_DOCUMENT_FRAGMENT = 0x00000400
24 | SHOW_NOTATION = 0x00000800
25 |
26 | def acceptNode(self, node):
27 | raise NotImplementedError
28 |
--------------------------------------------------------------------------------
/xml/dom/__init__.py:
--------------------------------------------------------------------------------
1 | """W3C Document Object Model implementation for Python.
2 |
3 | The Python mapping of the Document Object Model is documented in the
4 | Python Library Reference in the section on the xml.dom package.
5 |
6 | This package contains the following modules:
7 |
8 | minidom -- A simple implementation of the Level 1 DOM with namespace
9 | support added (based on the Level 2 specification) and other
10 | minor Level 2 functionality.
11 |
12 | pulldom -- DOM builder supporting on-demand tree-building for selected
13 | subtrees of the document.
14 |
15 | """
16 |
17 |
18 | class Node:
19 | """Class giving the NodeType constants."""
20 | __slots__ = ()
21 |
22 | # DOM implementations may use this as a base class for their own
23 | # Node implementations. If they don't, the constants defined here
24 | # should still be used as the canonical definitions as they match
25 | # the values given in the W3C recommendation. Client code can
26 | # safely refer to these values in all tests of Node.nodeType
27 | # values.
28 |
29 | ELEMENT_NODE = 1
30 | ATTRIBUTE_NODE = 2
31 | TEXT_NODE = 3
32 | CDATA_SECTION_NODE = 4
33 | ENTITY_REFERENCE_NODE = 5
34 | ENTITY_NODE = 6
35 | PROCESSING_INSTRUCTION_NODE = 7
36 | COMMENT_NODE = 8
37 | DOCUMENT_NODE = 9
38 | DOCUMENT_TYPE_NODE = 10
39 | DOCUMENT_FRAGMENT_NODE = 11
40 | NOTATION_NODE = 12
41 |
42 |
43 | #ExceptionCode
44 | INDEX_SIZE_ERR = 1
45 | DOMSTRING_SIZE_ERR = 2
46 | HIERARCHY_REQUEST_ERR = 3
47 | WRONG_DOCUMENT_ERR = 4
48 | INVALID_CHARACTER_ERR = 5
49 | NO_DATA_ALLOWED_ERR = 6
50 | NO_MODIFICATION_ALLOWED_ERR = 7
51 | NOT_FOUND_ERR = 8
52 | NOT_SUPPORTED_ERR = 9
53 | INUSE_ATTRIBUTE_ERR = 10
54 | INVALID_STATE_ERR = 11
55 | SYNTAX_ERR = 12
56 | INVALID_MODIFICATION_ERR = 13
57 | NAMESPACE_ERR = 14
58 | INVALID_ACCESS_ERR = 15
59 | VALIDATION_ERR = 16
60 |
61 |
62 | class DOMException(Exception):
63 | """Abstract base class for DOM exceptions.
64 | Exceptions with specific codes are specializations of this class."""
65 |
66 | def __init__(self, *args, **kw):
67 | if self.__class__ is DOMException:
68 | raise RuntimeError(
69 | "DOMException should not be instantiated directly")
70 | Exception.__init__(self, *args, **kw)
71 |
72 | def _get_code(self):
73 | return self.code
74 |
75 |
76 | class IndexSizeErr(DOMException):
77 | code = INDEX_SIZE_ERR
78 |
79 | class DomstringSizeErr(DOMException):
80 | code = DOMSTRING_SIZE_ERR
81 |
82 | class HierarchyRequestErr(DOMException):
83 | code = HIERARCHY_REQUEST_ERR
84 |
85 | class WrongDocumentErr(DOMException):
86 | code = WRONG_DOCUMENT_ERR
87 |
88 | class InvalidCharacterErr(DOMException):
89 | code = INVALID_CHARACTER_ERR
90 |
91 | class NoDataAllowedErr(DOMException):
92 | code = NO_DATA_ALLOWED_ERR
93 |
94 | class NoModificationAllowedErr(DOMException):
95 | code = NO_MODIFICATION_ALLOWED_ERR
96 |
97 | class NotFoundErr(DOMException):
98 | code = NOT_FOUND_ERR
99 |
100 | class NotSupportedErr(DOMException):
101 | code = NOT_SUPPORTED_ERR
102 |
103 | class InuseAttributeErr(DOMException):
104 | code = INUSE_ATTRIBUTE_ERR
105 |
106 | class InvalidStateErr(DOMException):
107 | code = INVALID_STATE_ERR
108 |
109 | class SyntaxErr(DOMException):
110 | code = SYNTAX_ERR
111 |
112 | class InvalidModificationErr(DOMException):
113 | code = INVALID_MODIFICATION_ERR
114 |
115 | class NamespaceErr(DOMException):
116 | code = NAMESPACE_ERR
117 |
118 | class InvalidAccessErr(DOMException):
119 | code = INVALID_ACCESS_ERR
120 |
121 | class ValidationErr(DOMException):
122 | code = VALIDATION_ERR
123 |
124 | class UserDataHandler:
125 | """Class giving the operation constants for UserDataHandler.handle()."""
126 |
127 | # Based on DOM Level 3 (WD 9 April 2002)
128 |
129 | NODE_CLONED = 1
130 | NODE_IMPORTED = 2
131 | NODE_DELETED = 3
132 | NODE_RENAMED = 4
133 |
134 | XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
135 | XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
136 | XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
137 | EMPTY_NAMESPACE = None
138 | EMPTY_PREFIX = None
139 |
140 | from .domreg import getDOMImplementation, registerDOMImplementation
141 |
--------------------------------------------------------------------------------
/xml/dom/domreg.py:
--------------------------------------------------------------------------------
1 | """Registration facilities for DOM. This module should not be used
2 | directly. Instead, the functions getDOMImplementation and
3 | registerDOMImplementation should be imported from xml.dom."""
4 |
5 | # This is a list of well-known implementations. Well-known names
6 | # should be published by posting to xml-sig@python.org, and are
7 | # subsequently recorded in this file.
8 |
9 | import sys
10 |
11 | well_known_implementations = {
12 | 'minidom':'xml.dom.minidom',
13 | '4DOM': 'xml.dom.DOMImplementation',
14 | }
15 |
16 | # DOM implementations not officially registered should register
17 | # themselves with their
18 |
19 | registered = {}
20 |
21 | def registerDOMImplementation(name, factory):
22 | """registerDOMImplementation(name, factory)
23 |
24 | Register the factory function with the name. The factory function
25 | should return an object which implements the DOMImplementation
26 | interface. The factory function can either return the same object,
27 | or a new one (e.g. if that implementation supports some
28 | customization)."""
29 |
30 | registered[name] = factory
31 |
32 | def _good_enough(dom, features):
33 | "_good_enough(dom, features) -> Return 1 if the dom offers the features"
34 | for f,v in features:
35 | if not dom.hasFeature(f,v):
36 | return 0
37 | return 1
38 |
39 | def getDOMImplementation(name=None, features=()):
40 | """getDOMImplementation(name = None, features = ()) -> DOM implementation.
41 |
42 | Return a suitable DOM implementation. The name is either
43 | well-known, the module name of a DOM implementation, or None. If
44 | it is not None, imports the corresponding module and returns
45 | DOMImplementation object if the import succeeds.
46 |
47 | If name is not given, consider the available implementations to
48 | find one with the required feature set. If no implementation can
49 | be found, raise an ImportError. The features list must be a sequence
50 | of (feature, version) pairs which are passed to hasFeature."""
51 |
52 | import os
53 | creator = None
54 | mod = well_known_implementations.get(name)
55 | if mod:
56 | mod = __import__(mod, {}, {}, ['getDOMImplementation'])
57 | return mod.getDOMImplementation()
58 | elif name:
59 | return registered[name]()
60 | elif not sys.flags.ignore_environment and "PYTHON_DOM" in os.environ:
61 | return getDOMImplementation(name = os.environ["PYTHON_DOM"])
62 |
63 | # User did not specify a name, try implementations in arbitrary
64 | # order, returning the one that has the required features
65 | if isinstance(features, str):
66 | features = _parse_feature_string(features)
67 | for creator in registered.values():
68 | dom = creator()
69 | if _good_enough(dom, features):
70 | return dom
71 |
72 | for creator in well_known_implementations.keys():
73 | try:
74 | dom = getDOMImplementation(name = creator)
75 | except Exception: # typically ImportError, or AttributeError
76 | continue
77 | if _good_enough(dom, features):
78 | return dom
79 |
80 | raise ImportError("no suitable DOM implementation found")
81 |
82 | def _parse_feature_string(s):
83 | features = []
84 | parts = s.split()
85 | i = 0
86 | length = len(parts)
87 | while i < length:
88 | feature = parts[i]
89 | if feature[0] in "0123456789":
90 | raise ValueError("bad feature name: %r" % (feature,))
91 | i = i + 1
92 | version = None
93 | if i < length:
94 | v = parts[i]
95 | if v[0] in "0123456789":
96 | i = i + 1
97 | version = v
98 | features.append((feature, version))
99 | return tuple(features)
100 |
--------------------------------------------------------------------------------
/xml/dom/minicompat.py:
--------------------------------------------------------------------------------
1 | """Python version compatibility support for minidom.
2 |
3 | This module contains internal implementation details and
4 | should not be imported; use xml.dom.minidom instead.
5 | """
6 |
7 | # This module should only be imported using "import *".
8 | #
9 | # The following names are defined:
10 | #
11 | # NodeList -- lightest possible NodeList implementation
12 | #
13 | # EmptyNodeList -- lightest possible NodeList that is guaranteed to
14 | # remain empty (immutable)
15 | #
16 | # StringTypes -- tuple of defined string types
17 | #
18 | # defproperty -- function used in conjunction with GetattrMagic;
19 | # using these together is needed to make them work
20 | # as efficiently as possible in both Python 2.2+
21 | # and older versions. For example:
22 | #
23 | # class MyClass(GetattrMagic):
24 | # def _get_myattr(self):
25 | # return something
26 | #
27 | # defproperty(MyClass, "myattr",
28 | # "return some value")
29 | #
30 | # For Python 2.2 and newer, this will construct a
31 | # property object on the class, which avoids
32 | # needing to override __getattr__(). It will only
33 | # work for read-only attributes.
34 | #
35 | # For older versions of Python, inheriting from
36 | # GetattrMagic will use the traditional
37 | # __getattr__() hackery to achieve the same effect,
38 | # but less efficiently.
39 | #
40 | # defproperty() should be used for each version of
41 | # the relevant _get_() function.
42 |
43 | __all__ = ["NodeList", "EmptyNodeList", "StringTypes", "defproperty"]
44 |
45 | import xml.dom
46 |
47 | StringTypes = (str,)
48 |
49 |
50 | class NodeList(list):
51 | __slots__ = ()
52 |
53 | def item(self, index):
54 | if 0 <= index < len(self):
55 | return self[index]
56 |
57 | def _get_length(self):
58 | return len(self)
59 |
60 | def _set_length(self, value):
61 | raise xml.dom.NoModificationAllowedErr(
62 | "attempt to modify read-only attribute 'length'")
63 |
64 | length = property(_get_length, _set_length,
65 | doc="The number of nodes in the NodeList.")
66 |
67 | # For backward compatibility
68 | def __setstate__(self, state):
69 | if state is None:
70 | state = []
71 | self[:] = state
72 |
73 |
74 | class EmptyNodeList(tuple):
75 | __slots__ = ()
76 |
77 | def __add__(self, other):
78 | NL = NodeList()
79 | NL.extend(other)
80 | return NL
81 |
82 | def __radd__(self, other):
83 | NL = NodeList()
84 | NL.extend(other)
85 | return NL
86 |
87 | def item(self, index):
88 | return None
89 |
90 | def _get_length(self):
91 | return 0
92 |
93 | def _set_length(self, value):
94 | raise xml.dom.NoModificationAllowedErr(
95 | "attempt to modify read-only attribute 'length'")
96 |
97 | length = property(_get_length, _set_length,
98 | doc="The number of nodes in the NodeList.")
99 |
100 |
101 | def defproperty(klass, name, doc):
102 | get = getattr(klass, ("_get_" + name))
103 | def set(self, value, name=name):
104 | raise xml.dom.NoModificationAllowedErr(
105 | "attempt to modify read-only attribute " + repr(name))
106 | assert not hasattr(klass, "_set_" + name), \
107 | "expected not to find _set_" + name
108 | prop = property(get, set, doc=doc)
109 | setattr(klass, name, prop)
110 |
--------------------------------------------------------------------------------
/xml/dom/pulldom.py:
--------------------------------------------------------------------------------
1 | import xml.sax
2 | import xml.sax.handler
3 |
4 | START_ELEMENT = "START_ELEMENT"
5 | END_ELEMENT = "END_ELEMENT"
6 | COMMENT = "COMMENT"
7 | START_DOCUMENT = "START_DOCUMENT"
8 | END_DOCUMENT = "END_DOCUMENT"
9 | PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
10 | IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
11 | CHARACTERS = "CHARACTERS"
12 |
13 | class PullDOM(xml.sax.ContentHandler):
14 | _locator = None
15 | document = None
16 |
17 | def __init__(self, documentFactory=None):
18 | from xml.dom import XML_NAMESPACE
19 | self.documentFactory = documentFactory
20 | self.firstEvent = [None, None]
21 | self.lastEvent = self.firstEvent
22 | self.elementStack = []
23 | self.push = self.elementStack.append
24 | try:
25 | self.pop = self.elementStack.pop
26 | except AttributeError:
27 | # use class' pop instead
28 | pass
29 | self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
30 | self._current_context = self._ns_contexts[-1]
31 | self.pending_events = []
32 |
33 | def pop(self):
34 | result = self.elementStack[-1]
35 | del self.elementStack[-1]
36 | return result
37 |
38 | def setDocumentLocator(self, locator):
39 | self._locator = locator
40 |
41 | def startPrefixMapping(self, prefix, uri):
42 | if not hasattr(self, '_xmlns_attrs'):
43 | self._xmlns_attrs = []
44 | self._xmlns_attrs.append((prefix or 'xmlns', uri))
45 | self._ns_contexts.append(self._current_context.copy())
46 | self._current_context[uri] = prefix or None
47 |
48 | def endPrefixMapping(self, prefix):
49 | self._current_context = self._ns_contexts.pop()
50 |
51 | def startElementNS(self, name, tagName , attrs):
52 | # Retrieve xml namespace declaration attributes.
53 | xmlns_uri = 'http://www.w3.org/2000/xmlns/'
54 | xmlns_attrs = getattr(self, '_xmlns_attrs', None)
55 | if xmlns_attrs is not None:
56 | for aname, value in xmlns_attrs:
57 | attrs._attrs[(xmlns_uri, aname)] = value
58 | self._xmlns_attrs = []
59 | uri, localname = name
60 | if uri:
61 | # When using namespaces, the reader may or may not
62 | # provide us with the original name. If not, create
63 | # *a* valid tagName from the current context.
64 | if tagName is None:
65 | prefix = self._current_context[uri]
66 | if prefix:
67 | tagName = prefix + ":" + localname
68 | else:
69 | tagName = localname
70 | if self.document:
71 | node = self.document.createElementNS(uri, tagName)
72 | else:
73 | node = self.buildDocument(uri, tagName)
74 | else:
75 | # When the tagname is not prefixed, it just appears as
76 | # localname
77 | if self.document:
78 | node = self.document.createElement(localname)
79 | else:
80 | node = self.buildDocument(None, localname)
81 |
82 | for aname,value in attrs.items():
83 | a_uri, a_localname = aname
84 | if a_uri == xmlns_uri:
85 | if a_localname == 'xmlns':
86 | qname = a_localname
87 | else:
88 | qname = 'xmlns:' + a_localname
89 | attr = self.document.createAttributeNS(a_uri, qname)
90 | node.setAttributeNodeNS(attr)
91 | elif a_uri:
92 | prefix = self._current_context[a_uri]
93 | if prefix:
94 | qname = prefix + ":" + a_localname
95 | else:
96 | qname = a_localname
97 | attr = self.document.createAttributeNS(a_uri, qname)
98 | node.setAttributeNodeNS(attr)
99 | else:
100 | attr = self.document.createAttribute(a_localname)
101 | node.setAttributeNode(attr)
102 | attr.value = value
103 |
104 | self.lastEvent[1] = [(START_ELEMENT, node), None]
105 | self.lastEvent = self.lastEvent[1]
106 | self.push(node)
107 |
108 | def endElementNS(self, name, tagName):
109 | self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
110 | self.lastEvent = self.lastEvent[1]
111 |
112 | def startElement(self, name, attrs):
113 | if self.document:
114 | node = self.document.createElement(name)
115 | else:
116 | node = self.buildDocument(None, name)
117 |
118 | for aname,value in attrs.items():
119 | attr = self.document.createAttribute(aname)
120 | attr.value = value
121 | node.setAttributeNode(attr)
122 |
123 | self.lastEvent[1] = [(START_ELEMENT, node), None]
124 | self.lastEvent = self.lastEvent[1]
125 | self.push(node)
126 |
127 | def endElement(self, name):
128 | self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
129 | self.lastEvent = self.lastEvent[1]
130 |
131 | def comment(self, s):
132 | if self.document:
133 | node = self.document.createComment(s)
134 | self.lastEvent[1] = [(COMMENT, node), None]
135 | self.lastEvent = self.lastEvent[1]
136 | else:
137 | event = [(COMMENT, s), None]
138 | self.pending_events.append(event)
139 |
140 | def processingInstruction(self, target, data):
141 | if self.document:
142 | node = self.document.createProcessingInstruction(target, data)
143 | self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
144 | self.lastEvent = self.lastEvent[1]
145 | else:
146 | event = [(PROCESSING_INSTRUCTION, target, data), None]
147 | self.pending_events.append(event)
148 |
149 | def ignorableWhitespace(self, chars):
150 | node = self.document.createTextNode(chars)
151 | self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
152 | self.lastEvent = self.lastEvent[1]
153 |
154 | def characters(self, chars):
155 | node = self.document.createTextNode(chars)
156 | self.lastEvent[1] = [(CHARACTERS, node), None]
157 | self.lastEvent = self.lastEvent[1]
158 |
159 | def startDocument(self):
160 | if self.documentFactory is None:
161 | import xml.dom.minidom
162 | self.documentFactory = xml.dom.minidom.Document.implementation
163 |
164 | def buildDocument(self, uri, tagname):
165 | # Can't do that in startDocument, since we need the tagname
166 | # XXX: obtain DocumentType
167 | node = self.documentFactory.createDocument(uri, tagname, None)
168 | self.document = node
169 | self.lastEvent[1] = [(START_DOCUMENT, node), None]
170 | self.lastEvent = self.lastEvent[1]
171 | self.push(node)
172 | # Put everything we have seen so far into the document
173 | for e in self.pending_events:
174 | if e[0][0] == PROCESSING_INSTRUCTION:
175 | _,target,data = e[0]
176 | n = self.document.createProcessingInstruction(target, data)
177 | e[0] = (PROCESSING_INSTRUCTION, n)
178 | elif e[0][0] == COMMENT:
179 | n = self.document.createComment(e[0][1])
180 | e[0] = (COMMENT, n)
181 | else:
182 | raise AssertionError("Unknown pending event ",e[0][0])
183 | self.lastEvent[1] = e
184 | self.lastEvent = e
185 | self.pending_events = None
186 | return node.firstChild
187 |
188 | def endDocument(self):
189 | self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
190 | self.pop()
191 |
192 | def clear(self):
193 | "clear(): Explicitly release parsing structures"
194 | self.document = None
195 |
196 | class ErrorHandler:
197 | def warning(self, exception):
198 | print(exception)
199 | def error(self, exception):
200 | raise exception
201 | def fatalError(self, exception):
202 | raise exception
203 |
204 | class DOMEventStream:
205 | def __init__(self, stream, parser, bufsize):
206 | self.stream = stream
207 | self.parser = parser
208 | self.bufsize = bufsize
209 | if not hasattr(self.parser, 'feed'):
210 | self.getEvent = self._slurp
211 | self.reset()
212 |
213 | def reset(self):
214 | self.pulldom = PullDOM()
215 | # This content handler relies on namespace support
216 | self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
217 | self.parser.setContentHandler(self.pulldom)
218 |
219 | def __getitem__(self, pos):
220 | import warnings
221 | warnings.warn(
222 | "DOMEventStream's __getitem__ method ignores 'pos' parameter. "
223 | "Use iterator protocol instead.",
224 | DeprecationWarning,
225 | stacklevel=2
226 | )
227 | rc = self.getEvent()
228 | if rc:
229 | return rc
230 | raise IndexError
231 |
232 | def __next__(self):
233 | rc = self.getEvent()
234 | if rc:
235 | return rc
236 | raise StopIteration
237 |
238 | def __iter__(self):
239 | return self
240 |
241 | def expandNode(self, node):
242 | event = self.getEvent()
243 | parents = [node]
244 | while event:
245 | token, cur_node = event
246 | if cur_node is node:
247 | return
248 | if token != END_ELEMENT:
249 | parents[-1].appendChild(cur_node)
250 | if token == START_ELEMENT:
251 | parents.append(cur_node)
252 | elif token == END_ELEMENT:
253 | del parents[-1]
254 | event = self.getEvent()
255 |
256 | def getEvent(self):
257 | # use IncrementalParser interface, so we get the desired
258 | # pull effect
259 | if not self.pulldom.firstEvent[1]:
260 | self.pulldom.lastEvent = self.pulldom.firstEvent
261 | while not self.pulldom.firstEvent[1]:
262 | buf = self.stream.read(self.bufsize)
263 | if not buf:
264 | self.parser.close()
265 | return None
266 | self.parser.feed(buf)
267 | rc = self.pulldom.firstEvent[1][0]
268 | self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
269 | return rc
270 |
271 | def _slurp(self):
272 | """ Fallback replacement for getEvent() using the
273 | standard SAX2 interface, which means we slurp the
274 | SAX events into memory (no performance gain, but
275 | we are compatible to all SAX parsers).
276 | """
277 | self.parser.parse(self.stream)
278 | self.getEvent = self._emit
279 | return self._emit()
280 |
281 | def _emit(self):
282 | """ Fallback replacement for getEvent() that emits
283 | the events that _slurp() read previously.
284 | """
285 | rc = self.pulldom.firstEvent[1][0]
286 | self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
287 | return rc
288 |
289 | def clear(self):
290 | """clear(): Explicitly release parsing objects"""
291 | self.pulldom.clear()
292 | del self.pulldom
293 | self.parser = None
294 | self.stream = None
295 |
296 | class SAX2DOM(PullDOM):
297 |
298 | def startElementNS(self, name, tagName , attrs):
299 | PullDOM.startElementNS(self, name, tagName, attrs)
300 | curNode = self.elementStack[-1]
301 | parentNode = self.elementStack[-2]
302 | parentNode.appendChild(curNode)
303 |
304 | def startElement(self, name, attrs):
305 | PullDOM.startElement(self, name, attrs)
306 | curNode = self.elementStack[-1]
307 | parentNode = self.elementStack[-2]
308 | parentNode.appendChild(curNode)
309 |
310 | def processingInstruction(self, target, data):
311 | PullDOM.processingInstruction(self, target, data)
312 | node = self.lastEvent[0][1]
313 | parentNode = self.elementStack[-1]
314 | parentNode.appendChild(node)
315 |
316 | def ignorableWhitespace(self, chars):
317 | PullDOM.ignorableWhitespace(self, chars)
318 | node = self.lastEvent[0][1]
319 | parentNode = self.elementStack[-1]
320 | parentNode.appendChild(node)
321 |
322 | def characters(self, chars):
323 | PullDOM.characters(self, chars)
324 | node = self.lastEvent[0][1]
325 | parentNode = self.elementStack[-1]
326 | parentNode.appendChild(node)
327 |
328 |
329 | default_bufsize = (2 ** 14) - 20
330 |
331 | def parse(stream_or_string, parser=None, bufsize=None):
332 | if bufsize is None:
333 | bufsize = default_bufsize
334 | if isinstance(stream_or_string, str):
335 | stream = open(stream_or_string, 'rb')
336 | else:
337 | stream = stream_or_string
338 | if not parser:
339 | parser = xml.sax.make_parser()
340 | return DOMEventStream(stream, parser, bufsize)
341 |
342 | def parseString(string, parser=None):
343 | from io import StringIO
344 |
345 | bufsize = len(string)
346 | buf = StringIO(string)
347 | if not parser:
348 | parser = xml.sax.make_parser()
349 | return DOMEventStream(buf, parser, bufsize)
350 |
--------------------------------------------------------------------------------
/xml/dom/xmlbuilder.py:
--------------------------------------------------------------------------------
1 | """Implementation of the DOM Level 3 'LS-Load' feature."""
2 |
3 | import copy
4 | import warnings
5 | import xml.dom
6 |
7 | from xml.dom.NodeFilter import NodeFilter
8 |
9 |
10 | __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
11 |
12 |
13 | class Options:
14 | """Features object that has variables set for each DOMBuilder feature.
15 |
16 | The DOMBuilder class uses an instance of this class to pass settings to
17 | the ExpatBuilder class.
18 | """
19 |
20 | # Note that the DOMBuilder class in LoadSave constrains which of these
21 | # values can be set using the DOM Level 3 LoadSave feature.
22 |
23 | namespaces = 1
24 | namespace_declarations = True
25 | validation = False
26 | external_parameter_entities = True
27 | external_general_entities = True
28 | external_dtd_subset = True
29 | validate_if_schema = False
30 | validate = False
31 | datatype_normalization = False
32 | create_entity_ref_nodes = True
33 | entities = True
34 | whitespace_in_element_content = True
35 | cdata_sections = True
36 | comments = True
37 | charset_overrides_xml_encoding = True
38 | infoset = False
39 | supported_mediatypes_only = False
40 |
41 | errorHandler = None
42 | filter = None
43 |
44 |
45 | class DOMBuilder:
46 | entityResolver = None
47 | errorHandler = None
48 | filter = None
49 |
50 | ACTION_REPLACE = 1
51 | ACTION_APPEND_AS_CHILDREN = 2
52 | ACTION_INSERT_AFTER = 3
53 | ACTION_INSERT_BEFORE = 4
54 |
55 | _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
56 | ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
57 |
58 | def __init__(self):
59 | self._options = Options()
60 |
61 | def _get_entityResolver(self):
62 | return self.entityResolver
63 | def _set_entityResolver(self, entityResolver):
64 | self.entityResolver = entityResolver
65 |
66 | def _get_errorHandler(self):
67 | return self.errorHandler
68 | def _set_errorHandler(self, errorHandler):
69 | self.errorHandler = errorHandler
70 |
71 | def _get_filter(self):
72 | return self.filter
73 | def _set_filter(self, filter):
74 | self.filter = filter
75 |
76 | def setFeature(self, name, state):
77 | if self.supportsFeature(name):
78 | state = state and 1 or 0
79 | try:
80 | settings = self._settings[(_name_xform(name), state)]
81 | except KeyError:
82 | raise xml.dom.NotSupportedErr(
83 | "unsupported feature: %r" % (name,)) from None
84 | else:
85 | for name, value in settings:
86 | setattr(self._options, name, value)
87 | else:
88 | raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
89 |
90 | def supportsFeature(self, name):
91 | return hasattr(self._options, _name_xform(name))
92 |
93 | def canSetFeature(self, name, state):
94 | key = (_name_xform(name), state and 1 or 0)
95 | return key in self._settings
96 |
97 | # This dictionary maps from (feature,value) to a list of
98 | # (option,value) pairs that should be set on the Options object.
99 | # If a (feature,value) setting is not in this dictionary, it is
100 | # not supported by the DOMBuilder.
101 | #
102 | _settings = {
103 | ("namespace_declarations", 0): [
104 | ("namespace_declarations", 0)],
105 | ("namespace_declarations", 1): [
106 | ("namespace_declarations", 1)],
107 | ("validation", 0): [
108 | ("validation", 0)],
109 | ("external_general_entities", 0): [
110 | ("external_general_entities", 0)],
111 | ("external_general_entities", 1): [
112 | ("external_general_entities", 1)],
113 | ("external_parameter_entities", 0): [
114 | ("external_parameter_entities", 0)],
115 | ("external_parameter_entities", 1): [
116 | ("external_parameter_entities", 1)],
117 | ("validate_if_schema", 0): [
118 | ("validate_if_schema", 0)],
119 | ("create_entity_ref_nodes", 0): [
120 | ("create_entity_ref_nodes", 0)],
121 | ("create_entity_ref_nodes", 1): [
122 | ("create_entity_ref_nodes", 1)],
123 | ("entities", 0): [
124 | ("create_entity_ref_nodes", 0),
125 | ("entities", 0)],
126 | ("entities", 1): [
127 | ("entities", 1)],
128 | ("whitespace_in_element_content", 0): [
129 | ("whitespace_in_element_content", 0)],
130 | ("whitespace_in_element_content", 1): [
131 | ("whitespace_in_element_content", 1)],
132 | ("cdata_sections", 0): [
133 | ("cdata_sections", 0)],
134 | ("cdata_sections", 1): [
135 | ("cdata_sections", 1)],
136 | ("comments", 0): [
137 | ("comments", 0)],
138 | ("comments", 1): [
139 | ("comments", 1)],
140 | ("charset_overrides_xml_encoding", 0): [
141 | ("charset_overrides_xml_encoding", 0)],
142 | ("charset_overrides_xml_encoding", 1): [
143 | ("charset_overrides_xml_encoding", 1)],
144 | ("infoset", 0): [],
145 | ("infoset", 1): [
146 | ("namespace_declarations", 0),
147 | ("validate_if_schema", 0),
148 | ("create_entity_ref_nodes", 0),
149 | ("entities", 0),
150 | ("cdata_sections", 0),
151 | ("datatype_normalization", 1),
152 | ("whitespace_in_element_content", 1),
153 | ("comments", 1),
154 | ("charset_overrides_xml_encoding", 1)],
155 | ("supported_mediatypes_only", 0): [
156 | ("supported_mediatypes_only", 0)],
157 | ("namespaces", 0): [
158 | ("namespaces", 0)],
159 | ("namespaces", 1): [
160 | ("namespaces", 1)],
161 | }
162 |
163 | def getFeature(self, name):
164 | xname = _name_xform(name)
165 | try:
166 | return getattr(self._options, xname)
167 | except AttributeError:
168 | if name == "infoset":
169 | options = self._options
170 | return (options.datatype_normalization
171 | and options.whitespace_in_element_content
172 | and options.comments
173 | and options.charset_overrides_xml_encoding
174 | and not (options.namespace_declarations
175 | or options.validate_if_schema
176 | or options.create_entity_ref_nodes
177 | or options.entities
178 | or options.cdata_sections))
179 | raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
180 |
181 | def parseURI(self, uri):
182 | if self.entityResolver:
183 | input = self.entityResolver.resolveEntity(None, uri)
184 | else:
185 | input = DOMEntityResolver().resolveEntity(None, uri)
186 | return self.parse(input)
187 |
188 | def parse(self, input):
189 | options = copy.copy(self._options)
190 | options.filter = self.filter
191 | options.errorHandler = self.errorHandler
192 | fp = input.byteStream
193 | if fp is None and options.systemId:
194 | import urllib.request
195 | fp = urllib.request.urlopen(input.systemId)
196 | return self._parse_bytestream(fp, options)
197 |
198 | def parseWithContext(self, input, cnode, action):
199 | if action not in self._legal_actions:
200 | raise ValueError("not a legal action")
201 | raise NotImplementedError("Haven't written this yet...")
202 |
203 | def _parse_bytestream(self, stream, options):
204 | import xml.dom.expatbuilder
205 | builder = xml.dom.expatbuilder.makeBuilder(options)
206 | return builder.parseFile(stream)
207 |
208 |
209 | def _name_xform(name):
210 | return name.lower().replace('-', '_')
211 |
212 |
213 | class DOMEntityResolver(object):
214 | __slots__ = '_opener',
215 |
216 | def resolveEntity(self, publicId, systemId):
217 | assert systemId is not None
218 | source = DOMInputSource()
219 | source.publicId = publicId
220 | source.systemId = systemId
221 | source.byteStream = self._get_opener().open(systemId)
222 |
223 | # determine the encoding if the transport provided it
224 | source.encoding = self._guess_media_encoding(source)
225 |
226 | # determine the base URI is we can
227 | import posixpath, urllib.parse
228 | parts = urllib.parse.urlparse(systemId)
229 | scheme, netloc, path, params, query, fragment = parts
230 | # XXX should we check the scheme here as well?
231 | if path and not path.endswith("/"):
232 | path = posixpath.dirname(path) + "/"
233 | parts = scheme, netloc, path, params, query, fragment
234 | source.baseURI = urllib.parse.urlunparse(parts)
235 |
236 | return source
237 |
238 | def _get_opener(self):
239 | try:
240 | return self._opener
241 | except AttributeError:
242 | self._opener = self._create_opener()
243 | return self._opener
244 |
245 | def _create_opener(self):
246 | import urllib.request
247 | return urllib.request.build_opener()
248 |
249 | def _guess_media_encoding(self, source):
250 | info = source.byteStream.info()
251 | if "Content-Type" in info:
252 | for param in info.getplist():
253 | if param.startswith("charset="):
254 | return param.split("=", 1)[1].lower()
255 |
256 |
257 | class DOMInputSource(object):
258 | __slots__ = ('byteStream', 'characterStream', 'stringData',
259 | 'encoding', 'publicId', 'systemId', 'baseURI')
260 |
261 | def __init__(self):
262 | self.byteStream = None
263 | self.characterStream = None
264 | self.stringData = None
265 | self.encoding = None
266 | self.publicId = None
267 | self.systemId = None
268 | self.baseURI = None
269 |
270 | def _get_byteStream(self):
271 | return self.byteStream
272 | def _set_byteStream(self, byteStream):
273 | self.byteStream = byteStream
274 |
275 | def _get_characterStream(self):
276 | return self.characterStream
277 | def _set_characterStream(self, characterStream):
278 | self.characterStream = characterStream
279 |
280 | def _get_stringData(self):
281 | return self.stringData
282 | def _set_stringData(self, data):
283 | self.stringData = data
284 |
285 | def _get_encoding(self):
286 | return self.encoding
287 | def _set_encoding(self, encoding):
288 | self.encoding = encoding
289 |
290 | def _get_publicId(self):
291 | return self.publicId
292 | def _set_publicId(self, publicId):
293 | self.publicId = publicId
294 |
295 | def _get_systemId(self):
296 | return self.systemId
297 | def _set_systemId(self, systemId):
298 | self.systemId = systemId
299 |
300 | def _get_baseURI(self):
301 | return self.baseURI
302 | def _set_baseURI(self, uri):
303 | self.baseURI = uri
304 |
305 |
306 | class DOMBuilderFilter:
307 | """Element filter which can be used to tailor construction of
308 | a DOM instance.
309 | """
310 |
311 | # There's really no need for this class; concrete implementations
312 | # should just implement the endElement() and startElement()
313 | # methods as appropriate. Using this makes it easy to only
314 | # implement one of them.
315 |
316 | FILTER_ACCEPT = 1
317 | FILTER_REJECT = 2
318 | FILTER_SKIP = 3
319 | FILTER_INTERRUPT = 4
320 |
321 | whatToShow = NodeFilter.SHOW_ALL
322 |
323 | def _get_whatToShow(self):
324 | return self.whatToShow
325 |
326 | def acceptNode(self, element):
327 | return self.FILTER_ACCEPT
328 |
329 | def startContainer(self, element):
330 | return self.FILTER_ACCEPT
331 |
332 | del NodeFilter
333 |
334 |
335 | class DocumentLS:
336 | """Mixin to create documents that conform to the load/save spec."""
337 |
338 | async_ = False
339 |
340 | def _get_async(self):
341 | return False
342 |
343 | def _set_async(self, flag):
344 | if flag:
345 | raise xml.dom.NotSupportedErr(
346 | "asynchronous document loading is not supported")
347 |
348 | def abort(self):
349 | # What does it mean to "clear" a document? Does the
350 | # documentElement disappear?
351 | raise NotImplementedError(
352 | "haven't figured out what this means yet")
353 |
354 | def load(self, uri):
355 | raise NotImplementedError("haven't written this yet")
356 |
357 | def loadXML(self, source):
358 | raise NotImplementedError("haven't written this yet")
359 |
360 | def saveXML(self, snode):
361 | if snode is None:
362 | snode = self
363 | elif snode.ownerDocument is not self:
364 | raise xml.dom.WrongDocumentErr()
365 | return snode.toxml()
366 |
367 |
368 | class DOMImplementationLS:
369 | MODE_SYNCHRONOUS = 1
370 | MODE_ASYNCHRONOUS = 2
371 |
372 | def createDOMBuilder(self, mode, schemaType):
373 | if schemaType is not None:
374 | raise xml.dom.NotSupportedErr(
375 | "schemaType not yet supported")
376 | if mode == self.MODE_SYNCHRONOUS:
377 | return DOMBuilder()
378 | if mode == self.MODE_ASYNCHRONOUS:
379 | raise xml.dom.NotSupportedErr(
380 | "asynchronous builders are not supported")
381 | raise ValueError("unknown value for mode")
382 |
383 | def createDOMWriter(self):
384 | raise NotImplementedError(
385 | "the writer interface hasn't been written yet!")
386 |
387 | def createDOMInputSource(self):
388 | return DOMInputSource()
389 |
--------------------------------------------------------------------------------
/xml/etree/ElementInclude.py:
--------------------------------------------------------------------------------
1 | #
2 | # ElementTree
3 | # $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
4 | #
5 | # limited xinclude support for element trees
6 | #
7 | # history:
8 | # 2003-08-15 fl created
9 | # 2003-11-14 fl fixed default loader
10 | #
11 | # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
12 | #
13 | # fredrik@pythonware.com
14 | # http://www.pythonware.com
15 | #
16 | # --------------------------------------------------------------------
17 | # The ElementTree toolkit is
18 | #
19 | # Copyright (c) 1999-2008 by Fredrik Lundh
20 | #
21 | # By obtaining, using, and/or copying this software and/or its
22 | # associated documentation, you agree that you have read, understood,
23 | # and will comply with the following terms and conditions:
24 | #
25 | # Permission to use, copy, modify, and distribute this software and
26 | # its associated documentation for any purpose and without fee is
27 | # hereby granted, provided that the above copyright notice appears in
28 | # all copies, and that both that copyright notice and this permission
29 | # notice appear in supporting documentation, and that the name of
30 | # Secret Labs AB or the author not be used in advertising or publicity
31 | # pertaining to distribution of the software without specific, written
32 | # prior permission.
33 | #
34 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41 | # OF THIS SOFTWARE.
42 | # --------------------------------------------------------------------
43 |
44 | # Licensed to PSF under a Contributor Agreement.
45 | # See http://www.python.org/psf/license for licensing details.
46 |
47 | ##
48 | # Limited XInclude support for the ElementTree package.
49 | ##
50 |
51 | import copy
52 | from . import ElementTree
53 |
54 | XINCLUDE = "{http://www.w3.org/2001/XInclude}"
55 |
56 | XINCLUDE_INCLUDE = XINCLUDE + "include"
57 | XINCLUDE_FALLBACK = XINCLUDE + "fallback"
58 |
59 | ##
60 | # Fatal include error.
61 |
62 | class FatalIncludeError(SyntaxError):
63 | pass
64 |
65 | ##
66 | # Default loader. This loader reads an included resource from disk.
67 | #
68 | # @param href Resource reference.
69 | # @param parse Parse mode. Either "xml" or "text".
70 | # @param encoding Optional text encoding (UTF-8 by default for "text").
71 | # @return The expanded resource. If the parse mode is "xml", this
72 | # is an ElementTree instance. If the parse mode is "text", this
73 | # is a Unicode string. If the loader fails, it can return None
74 | # or raise an OSError exception.
75 | # @throws OSError If the loader fails to load the resource.
76 |
77 | def default_loader(href, parse, encoding=None):
78 | if parse == "xml":
79 | with open(href, 'rb') as file:
80 | data = ElementTree.parse(file).getroot()
81 | else:
82 | if not encoding:
83 | encoding = 'UTF-8'
84 | with open(href, 'r', encoding=encoding) as file:
85 | data = file.read()
86 | return data
87 |
88 | ##
89 | # Expand XInclude directives.
90 | #
91 | # @param elem Root element.
92 | # @param loader Optional resource loader. If omitted, it defaults
93 | # to {@link default_loader}. If given, it should be a callable
94 | # that implements the same interface as default_loader.
95 | # @throws FatalIncludeError If the function fails to include a given
96 | # resource, or if the tree contains malformed XInclude elements.
97 | # @throws OSError If the function fails to load a given resource.
98 |
99 | def include(elem, loader=None):
100 | if loader is None:
101 | loader = default_loader
102 | # look for xinclude elements
103 | i = 0
104 | while i < len(elem):
105 | e = elem[i]
106 | if e.tag == XINCLUDE_INCLUDE:
107 | # process xinclude directive
108 | href = e.get("href")
109 | parse = e.get("parse", "xml")
110 | if parse == "xml":
111 | node = loader(href, parse)
112 | if node is None:
113 | raise FatalIncludeError(
114 | "cannot load %r as %r" % (href, parse)
115 | )
116 | node = copy.copy(node)
117 | if e.tail:
118 | node.tail = (node.tail or "") + e.tail
119 | elem[i] = node
120 | elif parse == "text":
121 | text = loader(href, parse, e.get("encoding"))
122 | if text is None:
123 | raise FatalIncludeError(
124 | "cannot load %r as %r" % (href, parse)
125 | )
126 | if i:
127 | node = elem[i-1]
128 | node.tail = (node.tail or "") + text + (e.tail or "")
129 | else:
130 | elem.text = (elem.text or "") + text + (e.tail or "")
131 | del elem[i]
132 | continue
133 | else:
134 | raise FatalIncludeError(
135 | "unknown parse type in xi:include tag (%r)" % parse
136 | )
137 | elif e.tag == XINCLUDE_FALLBACK:
138 | raise FatalIncludeError(
139 | "xi:fallback tag must be child of xi:include (%r)" % e.tag
140 | )
141 | else:
142 | include(e, loader)
143 | i = i + 1
144 |
--------------------------------------------------------------------------------
/xml/etree/ElementPath.py:
--------------------------------------------------------------------------------
1 | #
2 | # ElementTree
3 | # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
4 | #
5 | # limited xpath support for element trees
6 | #
7 | # history:
8 | # 2003-05-23 fl created
9 | # 2003-05-28 fl added support for // etc
10 | # 2003-08-27 fl fixed parsing of periods in element names
11 | # 2007-09-10 fl new selection engine
12 | # 2007-09-12 fl fixed parent selector
13 | # 2007-09-13 fl added iterfind; changed findall to return a list
14 | # 2007-11-30 fl added namespaces support
15 | # 2009-10-30 fl added child element value filter
16 | #
17 | # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
18 | #
19 | # fredrik@pythonware.com
20 | # http://www.pythonware.com
21 | #
22 | # --------------------------------------------------------------------
23 | # The ElementTree toolkit is
24 | #
25 | # Copyright (c) 1999-2009 by Fredrik Lundh
26 | #
27 | # By obtaining, using, and/or copying this software and/or its
28 | # associated documentation, you agree that you have read, understood,
29 | # and will comply with the following terms and conditions:
30 | #
31 | # Permission to use, copy, modify, and distribute this software and
32 | # its associated documentation for any purpose and without fee is
33 | # hereby granted, provided that the above copyright notice appears in
34 | # all copies, and that both that copyright notice and this permission
35 | # notice appear in supporting documentation, and that the name of
36 | # Secret Labs AB or the author not be used in advertising or publicity
37 | # pertaining to distribution of the software without specific, written
38 | # prior permission.
39 | #
40 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
41 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
42 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
43 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
44 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
45 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
46 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
47 | # OF THIS SOFTWARE.
48 | # --------------------------------------------------------------------
49 |
50 | # Licensed to PSF under a Contributor Agreement.
51 | # See http://www.python.org/psf/license for licensing details.
52 |
53 | ##
54 | # Implementation module for XPath support. There's usually no reason
55 | # to import this module directly; the ElementTree does this for
56 | # you, if needed.
57 | ##
58 |
59 | import re
60 |
61 | xpath_tokenizer_re = re.compile(
62 | r"("
63 | r"'[^']*'|\"[^\"]*\"|"
64 | r"::|"
65 | r"//?|"
66 | r"\.\.|"
67 | r"\(\)|"
68 | r"[/.*:\[\]\(\)@=])|"
69 | r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
70 | r"\s+"
71 | )
72 |
73 | def xpath_tokenizer(pattern, namespaces=None):
74 | default_namespace = namespaces.get('') if namespaces else None
75 | parsing_attribute = False
76 | for token in xpath_tokenizer_re.findall(pattern):
77 | ttype, tag = token
78 | if tag and tag[0] != "{":
79 | if ":" in tag:
80 | prefix, uri = tag.split(":", 1)
81 | try:
82 | if not namespaces:
83 | raise KeyError
84 | yield ttype, "{%s}%s" % (namespaces[prefix], uri)
85 | except KeyError:
86 | raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
87 | elif default_namespace and not parsing_attribute:
88 | yield ttype, "{%s}%s" % (default_namespace, tag)
89 | else:
90 | yield token
91 | parsing_attribute = False
92 | else:
93 | yield token
94 | parsing_attribute = ttype == '@'
95 |
96 |
97 | def get_parent_map(context):
98 | parent_map = context.parent_map
99 | if parent_map is None:
100 | context.parent_map = parent_map = {}
101 | for p in context.root.iter():
102 | for e in p:
103 | parent_map[e] = p
104 | return parent_map
105 |
106 |
107 | def _is_wildcard_tag(tag):
108 | return tag[:3] == '{*}' or tag[-2:] == '}*'
109 |
110 |
111 | def _prepare_tag(tag):
112 | _isinstance, _str = isinstance, str
113 | if tag == '{*}*':
114 | # Same as '*', but no comments or processing instructions.
115 | # It can be a surprise that '*' includes those, but there is no
116 | # justification for '{*}*' doing the same.
117 | def select(context, result):
118 | for elem in result:
119 | if _isinstance(elem.tag, _str):
120 | yield elem
121 | elif tag == '{}*':
122 | # Any tag that is not in a namespace.
123 | def select(context, result):
124 | for elem in result:
125 | el_tag = elem.tag
126 | if _isinstance(el_tag, _str) and el_tag[0] != '{':
127 | yield elem
128 | elif tag[:3] == '{*}':
129 | # The tag in any (or no) namespace.
130 | suffix = tag[2:] # '}name'
131 | no_ns = slice(-len(suffix), None)
132 | tag = tag[3:]
133 | def select(context, result):
134 | for elem in result:
135 | el_tag = elem.tag
136 | if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix:
137 | yield elem
138 | elif tag[-2:] == '}*':
139 | # Any tag in the given namespace.
140 | ns = tag[:-1]
141 | ns_only = slice(None, len(ns))
142 | def select(context, result):
143 | for elem in result:
144 | el_tag = elem.tag
145 | if _isinstance(el_tag, _str) and el_tag[ns_only] == ns:
146 | yield elem
147 | else:
148 | raise RuntimeError(f"internal parser error, got {tag}")
149 | return select
150 |
151 |
152 | def prepare_child(next, token):
153 | tag = token[1]
154 | if _is_wildcard_tag(tag):
155 | select_tag = _prepare_tag(tag)
156 | def select(context, result):
157 | def select_child(result):
158 | for elem in result:
159 | yield from elem
160 | return select_tag(context, select_child(result))
161 | else:
162 | if tag[:2] == '{}':
163 | tag = tag[2:] # '{}tag' == 'tag'
164 | def select(context, result):
165 | for elem in result:
166 | for e in elem:
167 | if e.tag == tag:
168 | yield e
169 | return select
170 |
171 | def prepare_star(next, token):
172 | def select(context, result):
173 | for elem in result:
174 | yield from elem
175 | return select
176 |
177 | def prepare_self(next, token):
178 | def select(context, result):
179 | yield from result
180 | return select
181 |
182 | def prepare_descendant(next, token):
183 | try:
184 | token = next()
185 | except StopIteration:
186 | return
187 | if token[0] == "*":
188 | tag = "*"
189 | elif not token[0]:
190 | tag = token[1]
191 | else:
192 | raise SyntaxError("invalid descendant")
193 |
194 | if _is_wildcard_tag(tag):
195 | select_tag = _prepare_tag(tag)
196 | def select(context, result):
197 | def select_child(result):
198 | for elem in result:
199 | for e in elem.iter():
200 | if e is not elem:
201 | yield e
202 | return select_tag(context, select_child(result))
203 | else:
204 | if tag[:2] == '{}':
205 | tag = tag[2:] # '{}tag' == 'tag'
206 | def select(context, result):
207 | for elem in result:
208 | for e in elem.iter(tag):
209 | if e is not elem:
210 | yield e
211 | return select
212 |
213 | def prepare_parent(next, token):
214 | def select(context, result):
215 | # FIXME: raise error if .. is applied at toplevel?
216 | parent_map = get_parent_map(context)
217 | result_map = {}
218 | for elem in result:
219 | if elem in parent_map:
220 | parent = parent_map[elem]
221 | if parent not in result_map:
222 | result_map[parent] = None
223 | yield parent
224 | return select
225 |
226 | def prepare_predicate(next, token):
227 | # FIXME: replace with real parser!!! refs:
228 | # http://effbot.org/zone/simple-iterator-parser.htm
229 | # http://javascript.crockford.com/tdop/tdop.html
230 | signature = []
231 | predicate = []
232 | while 1:
233 | try:
234 | token = next()
235 | except StopIteration:
236 | return
237 | if token[0] == "]":
238 | break
239 | if token == ('', ''):
240 | # ignore whitespace
241 | continue
242 | if token[0] and token[0][:1] in "'\"":
243 | token = "'", token[0][1:-1]
244 | signature.append(token[0] or "-")
245 | predicate.append(token[1])
246 | signature = "".join(signature)
247 | # use signature to determine predicate type
248 | if signature == "@-":
249 | # [@attribute] predicate
250 | key = predicate[1]
251 | def select(context, result):
252 | for elem in result:
253 | if elem.get(key) is not None:
254 | yield elem
255 | return select
256 | if signature == "@-='":
257 | # [@attribute='value']
258 | key = predicate[1]
259 | value = predicate[-1]
260 | def select(context, result):
261 | for elem in result:
262 | if elem.get(key) == value:
263 | yield elem
264 | return select
265 | if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):
266 | # [tag]
267 | tag = predicate[0]
268 | def select(context, result):
269 | for elem in result:
270 | if elem.find(tag) is not None:
271 | yield elem
272 | return select
273 | if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
274 | # [.='value'] or [tag='value']
275 | tag = predicate[0]
276 | value = predicate[-1]
277 | if tag:
278 | def select(context, result):
279 | for elem in result:
280 | for e in elem.findall(tag):
281 | if "".join(e.itertext()) == value:
282 | yield elem
283 | break
284 | else:
285 | def select(context, result):
286 | for elem in result:
287 | if "".join(elem.itertext()) == value:
288 | yield elem
289 | return select
290 | if signature == "-" or signature == "-()" or signature == "-()-":
291 | # [index] or [last()] or [last()-index]
292 | if signature == "-":
293 | # [index]
294 | index = int(predicate[0]) - 1
295 | if index < 0:
296 | raise SyntaxError("XPath position >= 1 expected")
297 | else:
298 | if predicate[0] != "last":
299 | raise SyntaxError("unsupported function")
300 | if signature == "-()-":
301 | try:
302 | index = int(predicate[2]) - 1
303 | except ValueError:
304 | raise SyntaxError("unsupported expression")
305 | if index > -2:
306 | raise SyntaxError("XPath offset from last() must be negative")
307 | else:
308 | index = -1
309 | def select(context, result):
310 | parent_map = get_parent_map(context)
311 | for elem in result:
312 | try:
313 | parent = parent_map[elem]
314 | # FIXME: what if the selector is "*" ?
315 | elems = list(parent.findall(elem.tag))
316 | if elems[index] is elem:
317 | yield elem
318 | except (IndexError, KeyError):
319 | pass
320 | return select
321 | raise SyntaxError("invalid predicate")
322 |
323 | ops = {
324 | "": prepare_child,
325 | "*": prepare_star,
326 | ".": prepare_self,
327 | "..": prepare_parent,
328 | "//": prepare_descendant,
329 | "[": prepare_predicate,
330 | }
331 |
332 | _cache = {}
333 |
334 | class _SelectorContext:
335 | parent_map = None
336 | def __init__(self, root):
337 | self.root = root
338 |
339 | # --------------------------------------------------------------------
340 |
341 | ##
342 | # Generate all matching objects.
343 |
344 | def iterfind(elem, path, namespaces=None):
345 | # compile selector pattern
346 | if path[-1:] == "/":
347 | path = path + "*" # implicit all (FIXME: keep this?)
348 |
349 | cache_key = (path,)
350 | if namespaces:
351 | cache_key += tuple(sorted(namespaces.items()))
352 |
353 | try:
354 | selector = _cache[cache_key]
355 | except KeyError:
356 | if len(_cache) > 100:
357 | _cache.clear()
358 | if path[:1] == "/":
359 | raise SyntaxError("cannot use absolute path on element")
360 | next = iter(xpath_tokenizer(path, namespaces)).__next__
361 | try:
362 | token = next()
363 | except StopIteration:
364 | return
365 | selector = []
366 | while 1:
367 | try:
368 | selector.append(ops[token[0]](next, token))
369 | except StopIteration:
370 | raise SyntaxError("invalid path") from None
371 | try:
372 | token = next()
373 | if token[0] == "/":
374 | token = next()
375 | except StopIteration:
376 | break
377 | _cache[cache_key] = selector
378 | # execute selector pattern
379 | result = [elem]
380 | context = _SelectorContext(elem)
381 | for select in selector:
382 | result = select(context, result)
383 | return result
384 |
385 | ##
386 | # Find first matching object.
387 |
388 | def find(elem, path, namespaces=None):
389 | return next(iterfind(elem, path, namespaces), None)
390 |
391 | ##
392 | # Find all matching objects.
393 |
394 | def findall(elem, path, namespaces=None):
395 | return list(iterfind(elem, path, namespaces))
396 |
397 | ##
398 | # Find text for first matching object.
399 |
400 | def findtext(elem, path, default=None, namespaces=None):
401 | try:
402 | elem = next(iterfind(elem, path, namespaces))
403 | return elem.text or ""
404 | except StopIteration:
405 | return default
406 |
--------------------------------------------------------------------------------
/xml/etree/__init__.py:
--------------------------------------------------------------------------------
1 | # $Id: __init__.py 3375 2008-02-13 08:05:08Z fredrik $
2 | # elementtree package
3 |
4 | # --------------------------------------------------------------------
5 | # The ElementTree toolkit is
6 | #
7 | # Copyright (c) 1999-2008 by Fredrik Lundh
8 | #
9 | # By obtaining, using, and/or copying this software and/or its
10 | # associated documentation, you agree that you have read, understood,
11 | # and will comply with the following terms and conditions:
12 | #
13 | # Permission to use, copy, modify, and distribute this software and
14 | # its associated documentation for any purpose and without fee is
15 | # hereby granted, provided that the above copyright notice appears in
16 | # all copies, and that both that copyright notice and this permission
17 | # notice appear in supporting documentation, and that the name of
18 | # Secret Labs AB or the author not be used in advertising or publicity
19 | # pertaining to distribution of the software without specific, written
20 | # prior permission.
21 | #
22 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
23 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
24 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
25 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
26 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
28 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
29 | # OF THIS SOFTWARE.
30 | # --------------------------------------------------------------------
31 |
32 | # Licensed to PSF under a Contributor Agreement.
33 | # See http://www.python.org/psf/license for licensing details.
34 |
--------------------------------------------------------------------------------
/xml/etree/cElementTree.py:
--------------------------------------------------------------------------------
1 | # Deprecated alias for xml.etree.ElementTree
2 |
3 | from xml.etree.ElementTree import *
4 |
--------------------------------------------------------------------------------
/xml/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | """Python interfaces to XML parsers.
2 |
3 | This package contains one module:
4 |
5 | expat -- Python wrapper for James Clark's Expat parser, with namespace
6 | support.
7 |
8 | """
9 |
--------------------------------------------------------------------------------
/xml/parsers/expat.py:
--------------------------------------------------------------------------------
1 | """Interface to the Expat non-validating XML parser."""
2 | import sys
3 |
4 | from pyexpat import *
5 |
6 | # provide pyexpat submodules as xml.parsers.expat submodules
7 | sys.modules['xml.parsers.expat.model'] = model
8 | sys.modules['xml.parsers.expat.errors'] = errors
9 |
--------------------------------------------------------------------------------
/xml/sax/__init__.py:
--------------------------------------------------------------------------------
1 | """Simple API for XML (SAX) implementation for Python.
2 |
3 | This module provides an implementation of the SAX 2 interface;
4 | information about the Java version of the interface can be found at
5 | http://www.megginson.com/SAX/. The Python version of the interface is
6 | documented at <...>.
7 |
8 | This package contains the following modules:
9 |
10 | handler -- Base classes and constants which define the SAX 2 API for
11 | the 'client-side' of SAX for Python.
12 |
13 | saxutils -- Implementation of the convenience classes commonly used to
14 | work with SAX.
15 |
16 | xmlreader -- Base classes and constants which define the SAX 2 API for
17 | the parsers used with SAX for Python.
18 |
19 | expatreader -- Driver that allows use of the Expat parser with SAX.
20 | """
21 |
22 | from .xmlreader import InputSource
23 | from .handler import ContentHandler, ErrorHandler
24 | from ._exceptions import SAXException, SAXNotRecognizedException, \
25 | SAXParseException, SAXNotSupportedException, \
26 | SAXReaderNotAvailable
27 |
28 |
29 | def parse(source, handler, errorHandler=ErrorHandler()):
30 | parser = make_parser()
31 | parser.setContentHandler(handler)
32 | parser.setErrorHandler(errorHandler)
33 | parser.parse(source)
34 |
35 | def parseString(string, handler, errorHandler=ErrorHandler()):
36 | import io
37 | if errorHandler is None:
38 | errorHandler = ErrorHandler()
39 | parser = make_parser()
40 | parser.setContentHandler(handler)
41 | parser.setErrorHandler(errorHandler)
42 |
43 | inpsrc = InputSource()
44 | if isinstance(string, str):
45 | inpsrc.setCharacterStream(io.StringIO(string))
46 | else:
47 | inpsrc.setByteStream(io.BytesIO(string))
48 | parser.parse(inpsrc)
49 |
50 | # this is the parser list used by the make_parser function if no
51 | # alternatives are given as parameters to the function
52 |
53 | default_parser_list = ["xml.sax.expatreader"]
54 |
55 | # tell modulefinder that importing sax potentially imports expatreader
56 | _false = 0
57 | if _false:
58 | import xml.sax.expatreader
59 |
60 | import os, sys
61 | if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ:
62 | default_parser_list = os.environ["PY_SAX_PARSER"].split(",")
63 | del os
64 |
65 | _key = "python.xml.sax.parser"
66 | if sys.platform[:4] == "java" and sys.registry.containsKey(_key):
67 | default_parser_list = sys.registry.getProperty(_key).split(",")
68 |
69 |
70 | def make_parser(parser_list=()):
71 | """Creates and returns a SAX parser.
72 |
73 | Creates the first parser it is able to instantiate of the ones
74 | given in the iterable created by chaining parser_list and
75 | default_parser_list. The iterables must contain the names of Python
76 | modules containing both a SAX parser and a create_parser function."""
77 |
78 | for parser_name in list(parser_list) + default_parser_list:
79 | try:
80 | return _create_parser(parser_name)
81 | except ImportError as e:
82 | import sys
83 | if parser_name in sys.modules:
84 | # The parser module was found, but importing it
85 | # failed unexpectedly, pass this exception through
86 | raise
87 | except SAXReaderNotAvailable:
88 | # The parser module detected that it won't work properly,
89 | # so try the next one
90 | pass
91 |
92 | raise SAXReaderNotAvailable("No parsers found", None)
93 |
94 | # --- Internal utility methods used by make_parser
95 |
96 | if sys.platform[ : 4] == "java":
97 | def _create_parser(parser_name):
98 | from org.python.core import imp
99 | drv_module = imp.importName(parser_name, 0, globals())
100 | return drv_module.create_parser()
101 |
102 | else:
103 | def _create_parser(parser_name):
104 | drv_module = __import__(parser_name,{},{},['create_parser'])
105 | return drv_module.create_parser()
106 |
107 | del sys
108 |
--------------------------------------------------------------------------------
/xml/sax/_exceptions.py:
--------------------------------------------------------------------------------
1 | """Different kinds of SAX Exceptions"""
2 | import sys
3 | if sys.platform[:4] == "java":
4 | from java.lang import Exception
5 | del sys
6 |
7 | # ===== SAXEXCEPTION =====
8 |
9 | class SAXException(Exception):
10 | """Encapsulate an XML error or warning. This class can contain
11 | basic error or warning information from either the XML parser or
12 | the application: you can subclass it to provide additional
13 | functionality, or to add localization. Note that although you will
14 | receive a SAXException as the argument to the handlers in the
15 | ErrorHandler interface, you are not actually required to raise
16 | the exception; instead, you can simply read the information in
17 | it."""
18 |
19 | def __init__(self, msg, exception=None):
20 | """Creates an exception. The message is required, but the exception
21 | is optional."""
22 | self._msg = msg
23 | self._exception = exception
24 | Exception.__init__(self, msg)
25 |
26 | def getMessage(self):
27 | "Return a message for this exception."
28 | return self._msg
29 |
30 | def getException(self):
31 | "Return the embedded exception, or None if there was none."
32 | return self._exception
33 |
34 | def __str__(self):
35 | "Create a string representation of the exception."
36 | return self._msg
37 |
38 | def __getitem__(self, ix):
39 | """Avoids weird error messages if someone does exception[ix] by
40 | mistake, since Exception has __getitem__ defined."""
41 | raise AttributeError("__getitem__")
42 |
43 |
44 | # ===== SAXPARSEEXCEPTION =====
45 |
46 | class SAXParseException(SAXException):
47 | """Encapsulate an XML parse error or warning.
48 |
49 | This exception will include information for locating the error in
50 | the original XML document. Note that although the application will
51 | receive a SAXParseException as the argument to the handlers in the
52 | ErrorHandler interface, the application is not actually required
53 | to raise the exception; instead, it can simply read the
54 | information in it and take a different action.
55 |
56 | Since this exception is a subclass of SAXException, it inherits
57 | the ability to wrap another exception."""
58 |
59 | def __init__(self, msg, exception, locator):
60 | "Creates the exception. The exception parameter is allowed to be None."
61 | SAXException.__init__(self, msg, exception)
62 | self._locator = locator
63 |
64 | # We need to cache this stuff at construction time.
65 | # If this exception is raised, the objects through which we must
66 | # traverse to get this information may be deleted by the time
67 | # it gets caught.
68 | self._systemId = self._locator.getSystemId()
69 | self._colnum = self._locator.getColumnNumber()
70 | self._linenum = self._locator.getLineNumber()
71 |
72 | def getColumnNumber(self):
73 | """The column number of the end of the text where the exception
74 | occurred."""
75 | return self._colnum
76 |
77 | def getLineNumber(self):
78 | "The line number of the end of the text where the exception occurred."
79 | return self._linenum
80 |
81 | def getPublicId(self):
82 | "Get the public identifier of the entity where the exception occurred."
83 | return self._locator.getPublicId()
84 |
85 | def getSystemId(self):
86 | "Get the system identifier of the entity where the exception occurred."
87 | return self._systemId
88 |
89 | def __str__(self):
90 | "Create a string representation of the exception."
91 | sysid = self.getSystemId()
92 | if sysid is None:
93 | sysid = ""
94 | linenum = self.getLineNumber()
95 | if linenum is None:
96 | linenum = "?"
97 | colnum = self.getColumnNumber()
98 | if colnum is None:
99 | colnum = "?"
100 | return "%s:%s:%s: %s" % (sysid, linenum, colnum, self._msg)
101 |
102 |
103 | # ===== SAXNOTRECOGNIZEDEXCEPTION =====
104 |
105 | class SAXNotRecognizedException(SAXException):
106 | """Exception class for an unrecognized identifier.
107 |
108 | An XMLReader will raise this exception when it is confronted with an
109 | unrecognized feature or property. SAX applications and extensions may
110 | use this class for similar purposes."""
111 |
112 |
113 | # ===== SAXNOTSUPPORTEDEXCEPTION =====
114 |
115 | class SAXNotSupportedException(SAXException):
116 | """Exception class for an unsupported operation.
117 |
118 | An XMLReader will raise this exception when a service it cannot
119 | perform is requested (specifically setting a state or value). SAX
120 | applications and extensions may use this class for similar
121 | purposes."""
122 |
123 | # ===== SAXNOTSUPPORTEDEXCEPTION =====
124 |
125 | class SAXReaderNotAvailable(SAXNotSupportedException):
126 | """Exception class for a missing driver.
127 |
128 | An XMLReader module (driver) should raise this exception when it
129 | is first imported, e.g. when a support module cannot be imported.
130 | It also may be raised during parsing, e.g. if executing an external
131 | program is not permitted."""
132 |
--------------------------------------------------------------------------------
/xml/sax/saxutils.py:
--------------------------------------------------------------------------------
1 | """\
2 | A library of useful helper classes to the SAX classes, for the
3 | convenience of application and driver writers.
4 | """
5 |
6 | import os, urllib.parse, urllib.request
7 | import io
8 | import codecs
9 | from . import handler
10 | from . import xmlreader
11 |
12 | def __dict_replace(s, d):
13 | """Replace substrings of a string using a dictionary."""
14 | for key, value in d.items():
15 | s = s.replace(key, value)
16 | return s
17 |
18 | def escape(data, entities={}):
19 | """Escape &, <, and > in a string of data.
20 |
21 | You can escape other strings of data by passing a dictionary as
22 | the optional entities parameter. The keys and values must all be
23 | strings; each key will be replaced with its corresponding value.
24 | """
25 |
26 | # must do ampersand first
27 | data = data.replace("&", "&")
28 | data = data.replace(">", ">")
29 | data = data.replace("<", "<")
30 | if entities:
31 | data = __dict_replace(data, entities)
32 | return data
33 |
34 | def unescape(data, entities={}):
35 | """Unescape &, <, and > in a string of data.
36 |
37 | You can unescape other strings of data by passing a dictionary as
38 | the optional entities parameter. The keys and values must all be
39 | strings; each key will be replaced with its corresponding value.
40 | """
41 | data = data.replace("<", "<")
42 | data = data.replace(">", ">")
43 | if entities:
44 | data = __dict_replace(data, entities)
45 | # must do ampersand last
46 | return data.replace("&", "&")
47 |
48 | def quoteattr(data, entities={}):
49 | """Escape and quote an attribute value.
50 |
51 | Escape &, <, and > in a string of data, then quote it for use as
52 | an attribute value. The \" character will be escaped as well, if
53 | necessary.
54 |
55 | You can escape other strings of data by passing a dictionary as
56 | the optional entities parameter. The keys and values must all be
57 | strings; each key will be replaced with its corresponding value.
58 | """
59 | entities = {**entities, '\n': '
', '\r': '
', '\t':' '}
60 | data = escape(data, entities)
61 | if '"' in data:
62 | if "'" in data:
63 | data = '"%s"' % data.replace('"', """)
64 | else:
65 | data = "'%s'" % data
66 | else:
67 | data = '"%s"' % data
68 | return data
69 |
70 |
71 | def _gettextwriter(out, encoding):
72 | if out is None:
73 | import sys
74 | return sys.stdout
75 |
76 | if isinstance(out, io.TextIOBase):
77 | # use a text writer as is
78 | return out
79 |
80 | if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)):
81 | # use a codecs stream writer as is
82 | return out
83 |
84 | # wrap a binary writer with TextIOWrapper
85 | if isinstance(out, io.RawIOBase):
86 | # Keep the original file open when the TextIOWrapper is
87 | # destroyed
88 | class _wrapper:
89 | __class__ = out.__class__
90 | def __getattr__(self, name):
91 | return getattr(out, name)
92 | buffer = _wrapper()
93 | buffer.close = lambda: None
94 | else:
95 | # This is to handle passed objects that aren't in the
96 | # IOBase hierarchy, but just have a write method
97 | buffer = io.BufferedIOBase()
98 | buffer.writable = lambda: True
99 | buffer.write = out.write
100 | try:
101 | # TextIOWrapper uses this methods to determine
102 | # if BOM (for UTF-16, etc) should be added
103 | buffer.seekable = out.seekable
104 | buffer.tell = out.tell
105 | except AttributeError:
106 | pass
107 | return io.TextIOWrapper(buffer, encoding=encoding,
108 | errors='xmlcharrefreplace',
109 | newline='\n',
110 | write_through=True)
111 |
112 | class XMLGenerator(handler.ContentHandler):
113 |
114 | def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
115 | handler.ContentHandler.__init__(self)
116 | out = _gettextwriter(out, encoding)
117 | self._write = out.write
118 | self._flush = out.flush
119 | self._ns_contexts = [{}] # contains uri -> prefix dicts
120 | self._current_context = self._ns_contexts[-1]
121 | self._undeclared_ns_maps = []
122 | self._encoding = encoding
123 | self._short_empty_elements = short_empty_elements
124 | self._pending_start_element = False
125 |
126 | def _qname(self, name):
127 | """Builds a qualified name from a (ns_url, localname) pair"""
128 | if name[0]:
129 | # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
130 | # bound by definition to http://www.w3.org/XML/1998/namespace. It
131 | # does not need to be declared and will not usually be found in
132 | # self._current_context.
133 | if 'http://www.w3.org/XML/1998/namespace' == name[0]:
134 | return 'xml:' + name[1]
135 | # The name is in a non-empty namespace
136 | prefix = self._current_context[name[0]]
137 | if prefix:
138 | # If it is not the default namespace, prepend the prefix
139 | return prefix + ":" + name[1]
140 | # Return the unqualified name
141 | return name[1]
142 |
143 | def _finish_pending_start_element(self,endElement=False):
144 | if self._pending_start_element:
145 | self._write('>')
146 | self._pending_start_element = False
147 |
148 | # ContentHandler methods
149 |
150 | def startDocument(self):
151 | self._write('\n' %
152 | self._encoding)
153 |
154 | def endDocument(self):
155 | self._flush()
156 |
157 | def startPrefixMapping(self, prefix, uri):
158 | self._ns_contexts.append(self._current_context.copy())
159 | self._current_context[uri] = prefix
160 | self._undeclared_ns_maps.append((prefix, uri))
161 |
162 | def endPrefixMapping(self, prefix):
163 | self._current_context = self._ns_contexts[-1]
164 | del self._ns_contexts[-1]
165 |
166 | def startElement(self, name, attrs):
167 | self._finish_pending_start_element()
168 | self._write('<' + name)
169 | for (name, value) in attrs.items():
170 | self._write(' %s=%s' % (name, quoteattr(value)))
171 | if self._short_empty_elements:
172 | self._pending_start_element = True
173 | else:
174 | self._write(">")
175 |
176 | def endElement(self, name):
177 | if self._pending_start_element:
178 | self._write('/>')
179 | self._pending_start_element = False
180 | else:
181 | self._write('%s>' % name)
182 |
183 | def startElementNS(self, name, qname, attrs):
184 | self._finish_pending_start_element()
185 | self._write('<' + self._qname(name))
186 |
187 | for prefix, uri in self._undeclared_ns_maps:
188 | if prefix:
189 | self._write(' xmlns:%s="%s"' % (prefix, uri))
190 | else:
191 | self._write(' xmlns="%s"' % uri)
192 | self._undeclared_ns_maps = []
193 |
194 | for (name, value) in attrs.items():
195 | self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
196 | if self._short_empty_elements:
197 | self._pending_start_element = True
198 | else:
199 | self._write(">")
200 |
201 | def endElementNS(self, name, qname):
202 | if self._pending_start_element:
203 | self._write('/>')
204 | self._pending_start_element = False
205 | else:
206 | self._write('%s>' % self._qname(name))
207 |
208 | def characters(self, content):
209 | if content:
210 | self._finish_pending_start_element()
211 | if not isinstance(content, str):
212 | content = str(content, self._encoding)
213 | self._write(escape(content))
214 |
215 | def ignorableWhitespace(self, content):
216 | if content:
217 | self._finish_pending_start_element()
218 | if not isinstance(content, str):
219 | content = str(content, self._encoding)
220 | self._write(content)
221 |
222 | def processingInstruction(self, target, data):
223 | self._finish_pending_start_element()
224 | self._write('%s %s?>' % (target, data))
225 |
226 |
227 | class XMLFilterBase(xmlreader.XMLReader):
228 | """This class is designed to sit between an XMLReader and the
229 | client application's event handlers. By default, it does nothing
230 | but pass requests up to the reader and events on to the handlers
231 | unmodified, but subclasses can override specific methods to modify
232 | the event stream or the configuration requests as they pass
233 | through."""
234 |
235 | def __init__(self, parent = None):
236 | xmlreader.XMLReader.__init__(self)
237 | self._parent = parent
238 |
239 | # ErrorHandler methods
240 |
241 | def error(self, exception):
242 | self._err_handler.error(exception)
243 |
244 | def fatalError(self, exception):
245 | self._err_handler.fatalError(exception)
246 |
247 | def warning(self, exception):
248 | self._err_handler.warning(exception)
249 |
250 | # ContentHandler methods
251 |
252 | def setDocumentLocator(self, locator):
253 | self._cont_handler.setDocumentLocator(locator)
254 |
255 | def startDocument(self):
256 | self._cont_handler.startDocument()
257 |
258 | def endDocument(self):
259 | self._cont_handler.endDocument()
260 |
261 | def startPrefixMapping(self, prefix, uri):
262 | self._cont_handler.startPrefixMapping(prefix, uri)
263 |
264 | def endPrefixMapping(self, prefix):
265 | self._cont_handler.endPrefixMapping(prefix)
266 |
267 | def startElement(self, name, attrs):
268 | self._cont_handler.startElement(name, attrs)
269 |
270 | def endElement(self, name):
271 | self._cont_handler.endElement(name)
272 |
273 | def startElementNS(self, name, qname, attrs):
274 | self._cont_handler.startElementNS(name, qname, attrs)
275 |
276 | def endElementNS(self, name, qname):
277 | self._cont_handler.endElementNS(name, qname)
278 |
279 | def characters(self, content):
280 | self._cont_handler.characters(content)
281 |
282 | def ignorableWhitespace(self, chars):
283 | self._cont_handler.ignorableWhitespace(chars)
284 |
285 | def processingInstruction(self, target, data):
286 | self._cont_handler.processingInstruction(target, data)
287 |
288 | def skippedEntity(self, name):
289 | self._cont_handler.skippedEntity(name)
290 |
291 | # DTDHandler methods
292 |
293 | def notationDecl(self, name, publicId, systemId):
294 | self._dtd_handler.notationDecl(name, publicId, systemId)
295 |
296 | def unparsedEntityDecl(self, name, publicId, systemId, ndata):
297 | self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
298 |
299 | # EntityResolver methods
300 |
301 | def resolveEntity(self, publicId, systemId):
302 | return self._ent_handler.resolveEntity(publicId, systemId)
303 |
304 | # XMLReader methods
305 |
306 | def parse(self, source):
307 | self._parent.setContentHandler(self)
308 | self._parent.setErrorHandler(self)
309 | self._parent.setEntityResolver(self)
310 | self._parent.setDTDHandler(self)
311 | self._parent.parse(source)
312 |
313 | def setLocale(self, locale):
314 | self._parent.setLocale(locale)
315 |
316 | def getFeature(self, name):
317 | return self._parent.getFeature(name)
318 |
319 | def setFeature(self, name, state):
320 | self._parent.setFeature(name, state)
321 |
322 | def getProperty(self, name):
323 | return self._parent.getProperty(name)
324 |
325 | def setProperty(self, name, value):
326 | self._parent.setProperty(name, value)
327 |
328 | # XMLFilter methods
329 |
330 | def getParent(self):
331 | return self._parent
332 |
333 | def setParent(self, parent):
334 | self._parent = parent
335 |
336 | # --- Utility functions
337 |
338 | def prepare_input_source(source, base=""):
339 | """This function takes an InputSource and an optional base URL and
340 | returns a fully resolved InputSource object ready for reading."""
341 |
342 | if isinstance(source, os.PathLike):
343 | source = os.fspath(source)
344 | if isinstance(source, str):
345 | source = xmlreader.InputSource(source)
346 | elif hasattr(source, "read"):
347 | f = source
348 | source = xmlreader.InputSource()
349 | if isinstance(f.read(0), str):
350 | source.setCharacterStream(f)
351 | else:
352 | source.setByteStream(f)
353 | if hasattr(f, "name") and isinstance(f.name, str):
354 | source.setSystemId(f.name)
355 |
356 | if source.getCharacterStream() is None and source.getByteStream() is None:
357 | sysid = source.getSystemId()
358 | basehead = os.path.dirname(os.path.normpath(base))
359 | sysidfilename = os.path.join(basehead, sysid)
360 | if os.path.isfile(sysidfilename):
361 | source.setSystemId(sysidfilename)
362 | f = open(sysidfilename, "rb")
363 | else:
364 | source.setSystemId(urllib.parse.urljoin(base, sysid))
365 | f = urllib.request.urlopen(source.getSystemId())
366 |
367 | source.setByteStream(f)
368 |
369 | return source
370 |
--------------------------------------------------------------------------------
/xml/sax/xmlreader.py:
--------------------------------------------------------------------------------
1 | """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
2 | should be based on this code. """
3 |
4 | from . import handler
5 |
6 | from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
7 |
8 |
9 | # ===== XMLREADER =====
10 |
11 | class XMLReader:
12 | """Interface for reading an XML document using callbacks.
13 |
14 | XMLReader is the interface that an XML parser's SAX2 driver must
15 | implement. This interface allows an application to set and query
16 | features and properties in the parser, to register event handlers
17 | for document processing, and to initiate a document parse.
18 |
19 | All SAX interfaces are assumed to be synchronous: the parse
20 | methods must not return until parsing is complete, and readers
21 | must wait for an event-handler callback to return before reporting
22 | the next event."""
23 |
24 | def __init__(self):
25 | self._cont_handler = handler.ContentHandler()
26 | self._dtd_handler = handler.DTDHandler()
27 | self._ent_handler = handler.EntityResolver()
28 | self._err_handler = handler.ErrorHandler()
29 |
30 | def parse(self, source):
31 | "Parse an XML document from a system identifier or an InputSource."
32 | raise NotImplementedError("This method must be implemented!")
33 |
34 | def getContentHandler(self):
35 | "Returns the current ContentHandler."
36 | return self._cont_handler
37 |
38 | def setContentHandler(self, handler):
39 | "Registers a new object to receive document content events."
40 | self._cont_handler = handler
41 |
42 | def getDTDHandler(self):
43 | "Returns the current DTD handler."
44 | return self._dtd_handler
45 |
46 | def setDTDHandler(self, handler):
47 | "Register an object to receive basic DTD-related events."
48 | self._dtd_handler = handler
49 |
50 | def getEntityResolver(self):
51 | "Returns the current EntityResolver."
52 | return self._ent_handler
53 |
54 | def setEntityResolver(self, resolver):
55 | "Register an object to resolve external entities."
56 | self._ent_handler = resolver
57 |
58 | def getErrorHandler(self):
59 | "Returns the current ErrorHandler."
60 | return self._err_handler
61 |
62 | def setErrorHandler(self, handler):
63 | "Register an object to receive error-message events."
64 | self._err_handler = handler
65 |
66 | def setLocale(self, locale):
67 | """Allow an application to set the locale for errors and warnings.
68 |
69 | SAX parsers are not required to provide localization for errors
70 | and warnings; if they cannot support the requested locale,
71 | however, they must raise a SAX exception. Applications may
72 | request a locale change in the middle of a parse."""
73 | raise SAXNotSupportedException("Locale support not implemented")
74 |
75 | def getFeature(self, name):
76 | "Looks up and returns the state of a SAX2 feature."
77 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
78 |
79 | def setFeature(self, name, state):
80 | "Sets the state of a SAX2 feature."
81 | raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
82 |
83 | def getProperty(self, name):
84 | "Looks up and returns the value of a SAX2 property."
85 | raise SAXNotRecognizedException("Property '%s' not recognized" % name)
86 |
87 | def setProperty(self, name, value):
88 | "Sets the value of a SAX2 property."
89 | raise SAXNotRecognizedException("Property '%s' not recognized" % name)
90 |
91 | class IncrementalParser(XMLReader):
92 | """This interface adds three extra methods to the XMLReader
93 | interface that allow XML parsers to support incremental
94 | parsing. Support for this interface is optional, since not all
95 | underlying XML parsers support this functionality.
96 |
97 | When the parser is instantiated it is ready to begin accepting
98 | data from the feed method immediately. After parsing has been
99 | finished with a call to close the reset method must be called to
100 | make the parser ready to accept new data, either from feed or
101 | using the parse method.
102 |
103 | Note that these methods must _not_ be called during parsing, that
104 | is, after parse has been called and before it returns.
105 |
106 | By default, the class also implements the parse method of the XMLReader
107 | interface using the feed, close and reset methods of the
108 | IncrementalParser interface as a convenience to SAX 2.0 driver
109 | writers."""
110 |
111 | def __init__(self, bufsize=2**16):
112 | self._bufsize = bufsize
113 | XMLReader.__init__(self)
114 |
115 | def parse(self, source):
116 | from . import saxutils
117 | source = saxutils.prepare_input_source(source)
118 |
119 | self.prepareParser(source)
120 | file = source.getCharacterStream()
121 | if file is None:
122 | file = source.getByteStream()
123 | buffer = file.read(self._bufsize)
124 | while buffer:
125 | self.feed(buffer)
126 | buffer = file.read(self._bufsize)
127 | self.close()
128 |
129 | def feed(self, data):
130 | """This method gives the raw XML data in the data parameter to
131 | the parser and makes it parse the data, emitting the
132 | corresponding events. It is allowed for XML constructs to be
133 | split across several calls to feed.
134 |
135 | feed may raise SAXException."""
136 | raise NotImplementedError("This method must be implemented!")
137 |
138 | def prepareParser(self, source):
139 | """This method is called by the parse implementation to allow
140 | the SAX 2.0 driver to prepare itself for parsing."""
141 | raise NotImplementedError("prepareParser must be overridden!")
142 |
143 | def close(self):
144 | """This method is called when the entire XML document has been
145 | passed to the parser through the feed method, to notify the
146 | parser that there are no more data. This allows the parser to
147 | do the final checks on the document and empty the internal
148 | data buffer.
149 |
150 | The parser will not be ready to parse another document until
151 | the reset method has been called.
152 |
153 | close may raise SAXException."""
154 | raise NotImplementedError("This method must be implemented!")
155 |
156 | def reset(self):
157 | """This method is called after close has been called to reset
158 | the parser so that it is ready to parse new documents. The
159 | results of calling parse or feed after close without calling
160 | reset are undefined."""
161 | raise NotImplementedError("This method must be implemented!")
162 |
163 | # ===== LOCATOR =====
164 |
165 | class Locator:
166 | """Interface for associating a SAX event with a document
167 | location. A locator object will return valid results only during
168 | calls to DocumentHandler methods; at any other time, the
169 | results are unpredictable."""
170 |
171 | def getColumnNumber(self):
172 | "Return the column number where the current event ends."
173 | return -1
174 |
175 | def getLineNumber(self):
176 | "Return the line number where the current event ends."
177 | return -1
178 |
179 | def getPublicId(self):
180 | "Return the public identifier for the current event."
181 | return None
182 |
183 | def getSystemId(self):
184 | "Return the system identifier for the current event."
185 | return None
186 |
187 | # ===== INPUTSOURCE =====
188 |
189 | class InputSource:
190 | """Encapsulation of the information needed by the XMLReader to
191 | read entities.
192 |
193 | This class may include information about the public identifier,
194 | system identifier, byte stream (possibly with character encoding
195 | information) and/or the character stream of an entity.
196 |
197 | Applications will create objects of this class for use in the
198 | XMLReader.parse method and for returning from
199 | EntityResolver.resolveEntity.
200 |
201 | An InputSource belongs to the application, the XMLReader is not
202 | allowed to modify InputSource objects passed to it from the
203 | application, although it may make copies and modify those."""
204 |
205 | def __init__(self, system_id = None):
206 | self.__system_id = system_id
207 | self.__public_id = None
208 | self.__encoding = None
209 | self.__bytefile = None
210 | self.__charfile = None
211 |
212 | def setPublicId(self, public_id):
213 | "Sets the public identifier of this InputSource."
214 | self.__public_id = public_id
215 |
216 | def getPublicId(self):
217 | "Returns the public identifier of this InputSource."
218 | return self.__public_id
219 |
220 | def setSystemId(self, system_id):
221 | "Sets the system identifier of this InputSource."
222 | self.__system_id = system_id
223 |
224 | def getSystemId(self):
225 | "Returns the system identifier of this InputSource."
226 | return self.__system_id
227 |
228 | def setEncoding(self, encoding):
229 | """Sets the character encoding of this InputSource.
230 |
231 | The encoding must be a string acceptable for an XML encoding
232 | declaration (see section 4.3.3 of the XML recommendation).
233 |
234 | The encoding attribute of the InputSource is ignored if the
235 | InputSource also contains a character stream."""
236 | self.__encoding = encoding
237 |
238 | def getEncoding(self):
239 | "Get the character encoding of this InputSource."
240 | return self.__encoding
241 |
242 | def setByteStream(self, bytefile):
243 | """Set the byte stream (a Python file-like object which does
244 | not perform byte-to-character conversion) for this input
245 | source.
246 |
247 | The SAX parser will ignore this if there is also a character
248 | stream specified, but it will use a byte stream in preference
249 | to opening a URI connection itself.
250 |
251 | If the application knows the character encoding of the byte
252 | stream, it should set it with the setEncoding method."""
253 | self.__bytefile = bytefile
254 |
255 | def getByteStream(self):
256 | """Get the byte stream for this input source.
257 |
258 | The getEncoding method will return the character encoding for
259 | this byte stream, or None if unknown."""
260 | return self.__bytefile
261 |
262 | def setCharacterStream(self, charfile):
263 | """Set the character stream for this input source. (The stream
264 | must be a Python 2.0 Unicode-wrapped file-like that performs
265 | conversion to Unicode strings.)
266 |
267 | If there is a character stream specified, the SAX parser will
268 | ignore any byte stream and will not attempt to open a URI
269 | connection to the system identifier."""
270 | self.__charfile = charfile
271 |
272 | def getCharacterStream(self):
273 | "Get the character stream for this input source."
274 | return self.__charfile
275 |
276 | # ===== ATTRIBUTESIMPL =====
277 |
278 | class AttributesImpl:
279 |
280 | def __init__(self, attrs):
281 | """Non-NS-aware implementation.
282 |
283 | attrs should be of the form {name : value}."""
284 | self._attrs = attrs
285 |
286 | def getLength(self):
287 | return len(self._attrs)
288 |
289 | def getType(self, name):
290 | return "CDATA"
291 |
292 | def getValue(self, name):
293 | return self._attrs[name]
294 |
295 | def getValueByQName(self, name):
296 | return self._attrs[name]
297 |
298 | def getNameByQName(self, name):
299 | if name not in self._attrs:
300 | raise KeyError(name)
301 | return name
302 |
303 | def getQNameByName(self, name):
304 | if name not in self._attrs:
305 | raise KeyError(name)
306 | return name
307 |
308 | def getNames(self):
309 | return list(self._attrs.keys())
310 |
311 | def getQNames(self):
312 | return list(self._attrs.keys())
313 |
314 | def __len__(self):
315 | return len(self._attrs)
316 |
317 | def __getitem__(self, name):
318 | return self._attrs[name]
319 |
320 | def keys(self):
321 | return list(self._attrs.keys())
322 |
323 | def __contains__(self, name):
324 | return name in self._attrs
325 |
326 | def get(self, name, alternative=None):
327 | return self._attrs.get(name, alternative)
328 |
329 | def copy(self):
330 | return self.__class__(self._attrs)
331 |
332 | def items(self):
333 | return list(self._attrs.items())
334 |
335 | def values(self):
336 | return list(self._attrs.values())
337 |
338 | # ===== ATTRIBUTESNSIMPL =====
339 |
340 | class AttributesNSImpl(AttributesImpl):
341 |
342 | def __init__(self, attrs, qnames):
343 | """NS-aware implementation.
344 |
345 | attrs should be of the form {(ns_uri, lname): value, ...}.
346 | qnames of the form {(ns_uri, lname): qname, ...}."""
347 | self._attrs = attrs
348 | self._qnames = qnames
349 |
350 | def getValueByQName(self, name):
351 | for (nsname, qname) in self._qnames.items():
352 | if qname == name:
353 | return self._attrs[nsname]
354 |
355 | raise KeyError(name)
356 |
357 | def getNameByQName(self, name):
358 | for (nsname, qname) in self._qnames.items():
359 | if qname == name:
360 | return nsname
361 |
362 | raise KeyError(name)
363 |
364 | def getQNameByName(self, name):
365 | return self._qnames[name]
366 |
367 | def getQNames(self):
368 | return list(self._qnames.values())
369 |
370 | def copy(self):
371 | return self.__class__(self._attrs, self._qnames)
372 |
373 |
374 | def _test():
375 | XMLReader()
376 | IncrementalParser()
377 | Locator()
378 |
379 | if __name__ == "__main__":
380 | _test()
381 |
--------------------------------------------------------------------------------
/xpath/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from builtins import str
3 | from builtins import range
4 | from builtins import object
5 | from xpath.exceptions import *
6 | import xpath.exceptions
7 | import xpath.expr
8 | import xpath.parser
9 | import xpath.yappsrt
10 |
11 | __all__ = ['find', 'findnode', 'findvalue', 'XPathContext', 'XPath']
12 | __all__.extend((x for x in dir(xpath.exceptions) if not x.startswith('_')))
13 |
14 | def api(f):
15 | """Decorator for functions and methods that are part of the external
16 | module API and that can throw XPathError exceptions.
17 |
18 | The call stack for these exceptions can be very large, and not very
19 | interesting to the user. This decorator rethrows XPathErrors to
20 | trim the stack.
21 |
22 | """
23 | def api_function(*args, **kwargs):
24 | try:
25 | return f(*args, **kwargs)
26 | except XPathError as e:
27 | raise e
28 | api_function.__name__ = f.__name__
29 | api_function.__doc__ = f.__doc__
30 | return api_function
31 |
32 | class XPathContext(object):
33 | def __init__(self, document=None, **kwargs):
34 | self.default_namespace = None
35 | self.namespaces = {}
36 | self.variables = {}
37 |
38 | if document is not None:
39 | if document.nodeType != document.DOCUMENT_NODE:
40 | document = document.ownerDocument
41 | if document.documentElement is not None:
42 | attrs = document.documentElement.attributes
43 | for attr in (attrs.item(i) for i in range(attrs.length)):
44 | if attr.name == 'xmlns':
45 | self.default_namespace = attr.value
46 | elif attr.name.startswith('xmlns:'):
47 | self.namespaces[attr.name[6:]] = attr.value
48 |
49 | self.update(**kwargs)
50 |
51 | def clone(self):
52 | dup = XPathContext()
53 | dup.default_namespace = self.default_namespace
54 | dup.namespaces.update(self.namespaces)
55 | dup.variables.update(self.variables)
56 | return dup
57 |
58 | def update(self, default_namespace=None, namespaces=None,
59 | variables=None, **kwargs):
60 | if default_namespace is not None:
61 | self.default_namespace = default_namespace
62 | if namespaces is not None:
63 | self.namespaces = namespaces
64 | if variables is not None:
65 | self.variables = variables
66 | self.variables.update(kwargs)
67 |
68 | @api
69 | def find(self, expr, node, **kwargs):
70 | return xpath.find(expr, node, context=self, **kwargs)
71 |
72 | @api
73 | def findnode(self, expr, node, **kwargs):
74 | return xpath.findnode(expr, node, context=self, **kwargs)
75 |
76 | @api
77 | def findvalue(self, expr, node, **kwargs):
78 | return xpath.findvalue(expr, node, context=self, **kwargs)
79 |
80 | @api
81 | def findvalues(self, expr, node, **kwargs):
82 | return xpath.findvalues(expr, node, context=self, **kwargs)
83 |
84 | class XPath(object):
85 | _max_cache = 100
86 | _cache = {}
87 |
88 | def __init__(self, expr):
89 | """Init docs.
90 | """
91 | try:
92 | parser = xpath.parser.XPath(xpath.parser.XPathScanner(str(expr)))
93 | self.expr = parser.XPath()
94 | except xpath.yappsrt.SyntaxError as e:
95 | raise XPathParseError(str(expr), e.pos, e.msg)
96 |
97 | @classmethod
98 | def get(cls, s):
99 | if isinstance(s, cls):
100 | return s
101 | try:
102 | return cls._cache[s]
103 | except KeyError:
104 | if len(cls._cache) > cls._max_cache:
105 | cls._cache.clear()
106 | expr = cls(s)
107 | cls._cache[s] = expr
108 | return expr
109 |
110 | @api
111 | def find(self, node, context=None, **kwargs):
112 | if context is None:
113 | context = XPathContext(node, **kwargs)
114 | elif kwargs:
115 | context = context.clone()
116 | context.update(**kwargs)
117 | return self.expr.evaluate(node, 1, 1, context)
118 |
119 | @api
120 | def findnode(self, node, context=None, **kwargs):
121 | result = self.find(node, context, **kwargs)
122 | if not xpath.expr.nodesetp(result):
123 | raise XPathTypeError("expression is not a node-set")
124 | if len(result) == 0:
125 | return None
126 | return result[0]
127 |
128 | @api
129 | def findvalue(self, node, context=None, **kwargs):
130 | result = self.find(node, context, **kwargs)
131 | if xpath.expr.nodesetp(result):
132 | if len(result) == 0:
133 | return None
134 | result = xpath.expr.string(result)
135 | return result
136 |
137 | @api
138 | def findvalues(self, node, context=None, **kwargs):
139 | result = self.find(node, context, **kwargs)
140 | if not xpath.expr.nodesetp(result):
141 | raise XPathTypeError("expression is not a node-set")
142 | return [xpath.expr.string_value(x) for x in result]
143 |
144 | def __repr__(self):
145 | return '%s.%s(%s)' % (self.__class__.__module__,
146 | self.__class__.__name__,
147 | repr(str(self.expr)))
148 |
149 | def __str__(self):
150 | return str(self.expr)
151 |
152 | @api
153 | def find(expr, node, **kwargs):
154 | return XPath.get(expr).find(node, **kwargs)
155 |
156 | @api
157 | def findnode(expr, node, **kwargs):
158 | return XPath.get(expr).findnode(node, **kwargs)
159 |
160 | @api
161 | def findvalue(expr, node, **kwargs):
162 | return XPath.get(expr).findvalue(node, **kwargs)
163 |
164 | @api
165 | def findvalues(expr, node, **kwargs):
166 | return XPath.get(expr).findvalues(node, **kwargs)
167 |
--------------------------------------------------------------------------------
/xpath/exceptions.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class XPathError(Exception):
4 | """Base exception class used for all XPath exceptions."""
5 |
6 | class XPathNotImplementedError(XPathError):
7 | """Raised when an XPath expression contains a feature of XPath which
8 | has not been implemented.
9 |
10 | """
11 |
12 | class XPathParseError(XPathError):
13 | """Raised when an XPath expression could not be parsed."""
14 |
15 | def __init__(self, expr, pos, message):
16 | XPathError.__init__(self)
17 | self.expr = expr
18 | self.pos = pos
19 | self.err = message
20 |
21 | def __str__(self):
22 | return ("Syntax error:\n" +
23 | self.expr.replace("\n", " ") + "\n" +
24 | ("-" * self.pos) + "^")
25 |
26 | class XPathTypeError(XPathError):
27 | """Raised when an XPath expression is found to contain a type error.
28 | For example, the expression "string()/node()" contains a type error
29 | because the "string()" function does not return a node-set.
30 |
31 | """
32 |
33 | class XPathUnknownFunctionError(XPathError):
34 | """Raised when an XPath expression contains a function that has no
35 | binding in the expression context.
36 |
37 | """
38 |
39 | class XPathUnknownPrefixError(XPathError):
40 | """Raised when an XPath expression contains a QName with a namespace
41 | prefix that has no corresponding namespace declaration in the expression
42 | context.
43 |
44 | """
45 |
46 | class XPathUnknownVariableError(XPathError):
47 | """Raised when an XPath expression contains a variable that has no
48 | binding in the expression context.
49 |
50 | """
51 |
--------------------------------------------------------------------------------
/xpath/parser.g:
--------------------------------------------------------------------------------
1 | import xpath.expr as X
2 | from xpath.yappsrt import *
3 |
4 | %%
5 |
6 | parser XPath:
7 | option: 'no-support-module'
8 |
9 | ignore: r'\s+'
10 | token END: r'$'
11 |
12 | token FORWARD_AXIS_NAME:
13 | r'child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace'
14 | token REVERSE_AXIS_NAME:
15 | r'parent|preceding-sibling|preceding|ancestor-or-self|ancestor'
16 |
17 | # Dire hack here, since yapps2 has only one token of lookahead: NCNAME
18 | # does not match when followed by a open paren.
19 | token NCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*(?!\()'
20 | token FUNCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*'
21 |
22 | token DQUOTE: r'\"(?:[^\"])*\"'
23 | token SQUOTE: r"\'(?:[^\'])*\'"
24 | token NUMBER: r'((\.[0-9]+)|([0-9]+(\.[0-9]*)?))([eE][\+\-]?[0-9]+)?'
25 | token EQ_COMP: r'\!?\='
26 | token REL_COMP: r'[\<\>]\=?'
27 | token ADD_COMP: r'[\+\-]'
28 | token MUL_COMP: r'\*|div|mod'
29 |
30 | rule XPath:
31 | Expr END {{ return Expr }}
32 |
33 | rule Expr:
34 | OrExpr {{ return OrExpr }}
35 |
36 | rule OrExpr:
37 | AndExpr {{ Expr = AndExpr }}
38 | (
39 | r'or' AndExpr
40 | {{ Expr = X.OrExpr('or', Expr, AndExpr) }}
41 | )* {{ return Expr }}
42 |
43 | rule AndExpr:
44 | EqualityExpr {{ Expr = EqualityExpr }}
45 | (
46 | r'and' EqualityExpr
47 | {{ Expr = X.AndExpr('and', Expr, EqualityExpr) }}
48 | )* {{ return Expr }}
49 |
50 | rule EqualityExpr:
51 | RelationalExpr {{ Expr = RelationalExpr }}
52 | (
53 | EQ_COMP
54 | RelationalExpr
55 | {{ Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr) }}
56 | )* {{ return Expr }}
57 |
58 | rule RelationalExpr:
59 | AdditiveExpr {{ Expr = AdditiveExpr }}
60 | (
61 | REL_COMP
62 | AdditiveExpr
63 | {{ Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr) }}
64 | )* {{ return Expr }}
65 |
66 | rule AdditiveExpr:
67 | MultiplicativeExpr {{ Expr = MultiplicativeExpr }}
68 | (
69 | ADD_COMP
70 | MultiplicativeExpr
71 | {{ Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr) }}
72 | )* {{ return Expr }}
73 |
74 | rule MultiplicativeExpr:
75 | UnionExpr {{ Expr = UnionExpr }}
76 | (
77 | MUL_COMP
78 | UnionExpr
79 | {{ Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr) }}
80 | )* {{ return Expr }}
81 |
82 | rule UnionExpr:
83 | UnaryExpr {{ Expr = UnaryExpr }}
84 | (
85 | '\|' UnaryExpr
86 | {{ Expr = X.UnionExpr('|', Expr, UnaryExpr) }}
87 | )* {{ return Expr }}
88 |
89 | rule UnaryExpr:
90 | r'\-' ValueExpr {{ return X.NegationExpr(ValueExpr) }}
91 | | ValueExpr {{ return ValueExpr }}
92 |
93 | rule ValueExpr:
94 | PathExpr {{ return PathExpr }}
95 |
96 | rule PathExpr:
97 | r'\/' {{ path = None }}
98 | [
99 | RelativePathExpr {{ path = RelativePathExpr }}
100 | ] {{ return X.AbsolutePathExpr(path) }}
101 | | r'\/\/' RelativePathExpr
102 | {{ step = X.AxisStep('descendant-or-self') }}
103 | {{ RelativePathExpr.steps.insert(0, step) }}
104 | {{ return X.AbsolutePathExpr(RelativePathExpr) }}
105 | | RelativePathExpr {{ return RelativePathExpr }}
106 |
107 | rule RelativePathExpr:
108 | StepExpr {{ steps = [StepExpr] }}
109 | (
110 | (
111 | r'\/'
112 | | r'\/\/'
113 | {{ steps.append(X.AxisStep('descendant-or-self')) }}
114 | )
115 | StepExpr {{ steps.append(StepExpr) }}
116 | )*
117 | {{ return X.PathExpr(steps) }}
118 |
119 | rule StepExpr:
120 | AxisStep {{ return AxisStep }}
121 | | FilterExpr {{ return FilterExpr }}
122 |
123 | rule AxisStep:
124 | (
125 | ForwardStep {{ step = ForwardStep }}
126 | | ReverseStep {{ step = ReverseStep }}
127 | ) {{ expr = X.AxisStep(*step) }}
128 | [
129 | PredicateList
130 | {{ expr = X.PredicateList(expr, PredicateList, step[0]) }}
131 | ]
132 | {{ return expr }}
133 |
134 | rule ForwardStep:
135 | ForwardAxis NodeTest {{ return [ForwardAxis, NodeTest] }}
136 | | AbbrevForwardStep {{ return AbbrevForwardStep }}
137 |
138 | rule ForwardAxis:
139 | FORWARD_AXIS_NAME r'::' {{ return FORWARD_AXIS_NAME }}
140 |
141 | rule AbbrevForwardStep:
142 | {{ axis = 'child' }}
143 | [
144 | r'@' {{ axis = 'attribute' }}
145 | ]
146 | NodeTest {{ return [axis, NodeTest] }}
147 |
148 | rule ReverseStep:
149 | ReverseAxis NodeTest {{ return [ReverseAxis, NodeTest] }}
150 | | AbbrevReverseStep {{ return AbbrevReverseStep }}
151 |
152 | rule ReverseAxis:
153 | REVERSE_AXIS_NAME r'::' {{ return REVERSE_AXIS_NAME }}
154 |
155 | rule AbbrevReverseStep:
156 | r'\.\.' {{ return ['parent', None] }}
157 |
158 | rule NodeTest:
159 | KindTest {{ return KindTest }}
160 | | NameTest {{ return NameTest }}
161 |
162 | rule NameTest:
163 | # We also support the XPath 2.0 :*.
164 | {{ prefix = None }}
165 | WildcardOrNCName {{ localpart = WildcardOrNCName }}
166 | [
167 | r':' WildcardOrNCName {{ prefix = localpart }}
168 | {{ localpart = WildcardOrNCName }}
169 | ]
170 | {{ return X.NameTest(prefix, localpart) }}
171 |
172 | rule WildcardOrNCName:
173 | r'\*' {{ return '*' }}
174 | | NCNAME {{ return NCNAME }}
175 |
176 | rule FilterExpr:
177 | PrimaryExpr
178 | [
179 | PredicateList
180 | {{ PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList) }}
181 | ] {{ return PrimaryExpr }}
182 |
183 | rule PredicateList:
184 | Predicate {{ predicates = [Predicate] }}
185 | (
186 | Predicate {{ predicates.append(Predicate) }}
187 | )* {{ return predicates }}
188 |
189 | rule Predicate:
190 | r'\[' Expr r'\]' {{ return Expr }}
191 |
192 | rule PrimaryExpr:
193 | Literal {{ return X.LiteralExpr(Literal) }}
194 | | VariableReference {{ return VariableReference }}
195 | | r'\(' Expr r'\)' {{ return Expr }}
196 | | ContextItemExpr {{ return ContextItemExpr }}
197 | | FunctionCall {{ return FunctionCall }}
198 |
199 | rule VariableReference:
200 | r'\$' QName
201 | {{ return X.VariableReference(*QName) }}
202 |
203 | rule ContextItemExpr:
204 | r'\.' {{ return X.AxisStep('self') }}
205 |
206 | rule FunctionCall:
207 | FUNCNAME r'\(' {{ args = [] }}
208 | [
209 | Expr {{ args.append(Expr) }}
210 | (
211 | r'\,' Expr {{ args.append(Expr) }}
212 | )*
213 | ] r'\)' {{ return X.Function(FUNCNAME, args) }}
214 |
215 | rule KindTest:
216 | PITest {{ return PITest }}
217 | | CommentTest {{ return CommentTest }}
218 | | TextTest {{ return TextTest }}
219 | | AnyKindTest {{ return AnyKindTest }}
220 |
221 | rule PITest:
222 | r'processing-instruction' {{ name = None }}
223 | r'\(' [
224 | NCNAME {{ name = NCNAME }}
225 | | StringLiteral {{ name = StringLiteral }}
226 | ] r'\)' {{ return X.PITest(name) }}
227 |
228 | rule CommentTest:
229 | r'comment' r'\(' r'\)' {{ return X.CommentTest() }}
230 |
231 | rule TextTest:
232 | r'text' r'\(' r'\)' {{ return X.TextTest() }}
233 |
234 | rule AnyKindTest:
235 | r'node' r'\(' r'\)' {{ return X.AnyKindTest() }}
236 |
237 | rule Literal:
238 | NumericLiteral {{ return NumericLiteral }}
239 | | StringLiteral {{ return StringLiteral }}
240 |
241 | rule NumericLiteral:
242 | NUMBER {{ return float(NUMBER) }}
243 |
244 | rule StringLiteral:
245 | DQUOTE {{ return DQUOTE[1:-1] }}
246 | | SQUOTE {{ return SQUOTE[1:-1] }}
247 |
248 | rule QName:
249 | NCNAME {{ name = NCNAME }}
250 | [
251 | r'\:' NCNAME {{ return (name, NCNAME) }}
252 | ] {{ return (None, name) }}
253 |
--------------------------------------------------------------------------------
/xpath/readme.txt:
--------------------------------------------------------------------------------
1 | This module was copied unmodified from py-dom-xpath, which I downloaded here:
2 | http://py-dom-xpath.googlecode.com/
3 |
4 | It has a bug that prevents it from properly handling XML nodes named , which is crucial for LIFT files.
5 |
6 | This should be fixed, but for now a workaround is to make a temp copy that uses instead,
7 | and run XPaths against that.
8 |
--------------------------------------------------------------------------------
/xpath/yappsrt.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Yapps 2.0 Runtime
4 | #
5 | # This module is needed to run generated parsers.
6 |
7 | from builtins import object
8 | import re
9 |
10 | class SyntaxError(Exception):
11 | """When we run into an unexpected token, this is the exception to use"""
12 | def __init__(self, pos=-1, msg="Bad Token"):
13 | Exception.__init__(self)
14 | self.pos = pos
15 | self.msg = msg
16 | def __repr__(self):
17 | if self.pos < 0: return "#"
18 | else: return "SyntaxError[@ char %s: %s]" % (repr(self.pos), self.msg)
19 |
20 | class NoMoreTokens(Exception):
21 | """Another exception object, for when we run out of tokens"""
22 | pass
23 |
24 | class Scanner(object):
25 | def __init__(self, patterns, ignore, input):
26 | """Patterns is [(terminal,regex)...]
27 | Ignore is [terminal,...];
28 | Input is a string"""
29 | self.tokens = []
30 | self.restrictions = []
31 | self.input = input
32 | self.pos = 0
33 | self.ignore = ignore
34 | # The stored patterns are a pair (compiled regex,source
35 | # regex). If the patterns variable passed in to the
36 | # constructor is None, we assume that the class already has a
37 | # proper .patterns list constructed
38 | if patterns is not None:
39 | self.patterns = []
40 | for k, r in patterns:
41 | self.patterns.append( (k, re.compile(r)) )
42 |
43 | def token(self, i, restrict=0):
44 | """Get the i'th token, and if i is one past the end, then scan
45 | for another token; restrict is a list of tokens that
46 | are allowed, or 0 for any token."""
47 | if i == len(self.tokens): self.scan(restrict)
48 | if i < len(self.tokens):
49 | # Make sure the restriction is more restricted
50 | if restrict and self.restrictions[i]:
51 | for r in restrict:
52 | if r not in self.restrictions[i]:
53 | raise NotImplementedError("Unimplemented: restriction set changed")
54 | return self.tokens[i]
55 | raise NoMoreTokens()
56 |
57 | def __repr__(self):
58 | """Print the last 10 tokens that have been scanned in"""
59 | output = ''
60 | for t in self.tokens[-10:]:
61 | output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
62 | return output
63 |
64 | def scan(self, restrict):
65 | """Should scan another token and add it to the list, self.tokens,
66 | and add the restriction to self.restrictions"""
67 | # Keep looking for a token, ignoring any in self.ignore
68 | while 1:
69 | # Search the patterns for the longest match, with earlier
70 | # tokens in the list having preference
71 | best_match = -1
72 | best_pat = '(error)'
73 | for p, regexp in self.patterns:
74 | # First check to see if we're ignoring this token
75 | if restrict and p not in restrict and p not in self.ignore:
76 | continue
77 | m = regexp.match(self.input, self.pos)
78 | if m and len(m.group(0)) > best_match:
79 | # We got a match that's better than the previous one
80 | best_pat = p
81 | best_match = len(m.group(0))
82 |
83 | # If we didn't find anything, raise an error
84 | if best_pat == '(error)' and best_match < 0:
85 | msg = "Bad Token"
86 | if restrict:
87 | msg = "Trying to find one of "+", ".join(restrict)
88 | raise SyntaxError(self.pos, msg)
89 |
90 | # If we found something that isn't to be ignored, return it
91 | if best_pat not in self.ignore:
92 | # Create a token with this data
93 | token = (self.pos, self.pos+best_match, best_pat,
94 | self.input[self.pos:self.pos+best_match])
95 | self.pos = self.pos + best_match
96 | # Only add this token if it's not in the list
97 | # (to prevent looping)
98 | if not self.tokens or token != self.tokens[-1]:
99 | self.tokens.append(token)
100 | self.restrictions.append(restrict)
101 | return
102 | else:
103 | # This token should be ignored ..
104 | self.pos = self.pos + best_match
105 |
106 | class Parser(object):
107 | def __init__(self, scanner):
108 | self._scanner = scanner
109 | self._pos = 0
110 |
111 | def _peek(self, *types):
112 | """Returns the token type for lookahead; if there are any args
113 | then the list of args is the set of token types to allow"""
114 | tok = self._scanner.token(self._pos, types)
115 | return tok[2]
116 |
117 | def _scan(self, type):
118 | """Returns the matched text, and moves to the next token"""
119 | tok = self._scanner.token(self._pos, [type])
120 | if tok[2] != type:
121 | raise SyntaxError(tok[0], 'Trying to find '+type)
122 | self._pos = 1+self._pos
123 | return tok[3]
124 |
125 |
126 |
127 | def print_error(input, err, scanner):
128 | """This is a really dumb long function to print error messages nicely."""
129 | p = err.pos
130 | # Figure out the line number
131 | line = '\n'.count(input[:p])
132 | print(err.msg+" on line "+repr(line+1)+":")
133 | # Now try printing part of the line
134 | text = input[max(p-80, 0):p+80]
135 | p = p - max(p-80, 0)
136 |
137 | # Strip to the left
138 | i = text[:p].rfind('\n')
139 | j = text[:p].rfind('\r')
140 | if i < 0 or (0 <= j < i): i = j
141 | if 0 <= i < p:
142 | p = p - i - 1
143 | text = text[i+1:]
144 |
145 | # Strip to the right
146 | i = text.find('\n', p)
147 | j = text.find('\r', p)
148 | if i < 0 or (0 <= j < i): i = j
149 | if i >= 0:
150 | text = text[:i]
151 |
152 | # Now shorten the text
153 | while len(text) > 70 and p > 60:
154 | # Cut off 10 chars
155 | text = "..." + text[10:]
156 | p = p - 7
157 |
158 | # Now print the string, along with an indicator
159 | print('> ',text)
160 | print('> ',' '*p + '^')
161 | print('List of nearby tokens:', scanner)
162 |
163 | def wrap_error_reporter(parser, rule):
164 | try:
165 | return_value = getattr(parser, rule)()
166 | except SyntaxError as s:
167 | input = parser._scanner.input
168 | try:
169 | print_error(input, s, parser._scanner)
170 | except ImportError:
171 | print('Syntax Error',s.msg,'on line',1+'\n'.count(input[:s.pos]))
172 | except NoMoreTokens:
173 | print('Could not complete parsing; stopped around here:')
174 | print(parser._scanner)
175 | return return_value
176 |
--------------------------------------------------------------------------------