├── tests
    ├── __init__.py
    ├── test_parser.py
    ├── test_get_namespaces.py
    └── test.xml
├── setup.py
├── .vscode
    └── settings.json
├── README.md
├── pyproject.toml
├── LICENSE
├── .github
    └── workflows
    │   └── publish_on_pipy.yml
├── .gitignore
└── src
    └── schema_st4_parser
        └── __init__.py


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup,find_packages
2 | import os
3 | 
4 | setup(
5 |     version=os.environ.get("PACKAGE_VERSION","0.0.0"),
6 |     package_dir={"":"src"},
7 |     packages=find_packages(where="./src", exclude=("*.tests", "*.tests.*", "tests.*", "tests"))
8 |     )


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.testing.unittestArgs": [
 3 |         "-v",
 4 |         "-s",
 5 |         "./tests",
 6 |         "-p",
 7 |         "test_*.py"
 8 |     ],
 9 |     "python.testing.pytestEnabled": true,
10 |     "python.testing.unittestEnabled": false,
11 |     "python.testing.pytestArgs": [
12 |         "tests"
13 |     ]
14 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Schema ST4 Python Parser
 2 | 
 3 | Parse [Schema ST4](https://www.quanos-content-solutions.com/en/software/schema-st4) XML Files into simple,flat python objects.
 4 | 
 5 | ## Installation
 6 | 
 7 | via pip: `pip install schema-st4-parser`
 8 | 
 9 | ## Usage
10 | Simply pass the xml file  into the parse methode. A list of St4Entry objects will be returned.
11 | 
12 | ```pytohn
13 | from schema_st4_parser import parse, St4Entry
14 | 
15 | entries = parse("MyFile.xml")
16 | print(entries[0])
17 | ```
18 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "setuptools-scm"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "schema_st4_parser"
 7 | dynamic = ["version"]
 8 | authors = [
 9 |   { name="Lukas Kreussel"},
10 | ]
11 | description = "Parse Schema ST4 xml files into pytohn objects"
12 | readme = "README.md"
13 | license = { file="LICENSE" }
14 | requires-python = ">=3.8"
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 | ]
20 | 
21 | [project.urls]
22 | "Homepage" = "https://github.com/LLukas22/ST4-Python-Parser"
23 | "Bug Tracker" = "https://github.com/LLukas22/ST4-Python-Parser/issues"


--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
 1 | from src.schema_st4_parser import St4Entry, parse, get_namespaces
 2 | 
 3 | def test_can_parse_file():
 4 |     results = parse("./tests/test.xml")
 5 |     assert len(results) == 1
 6 |     result = results[0]
 7 |     assert isinstance(result, St4Entry)
 8 |     assert result.label == "label"
 9 |     assert result.node_id == "nID"
10 |     assert result.link_id == "lID"
11 |     assert result.titles["en"] == "title_en"
12 |     assert result.titles["de"] == "title_de"
13 |     assert "en" in result.content
14 |     assert "de" in result.content
15 |     assert result.thumbnail == "thumbnail"
16 |     assert "GraficResource" in result.type
17 |     assert result.data_web["en"] == "data_web_en"
18 |     assert result.data_web["de"] == "data_web_de"
19 |     assert result.data_web_data["en"] == "data_web_data_en"
20 |     assert result.data_web_data["de"] == "data_web_data_de"
21 |     
22 |     
23 |     


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 LLukas22
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/.github/workflows/publish_on_pipy.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package to PyPi
10 | 
11 | on:   
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - name: checkout
25 |       uses: actions/checkout@v3
26 |       
27 |     - id: get_version
28 |       uses: battila7/get-version-action@v2
29 |         
30 |     - name: print version
31 |       run: echo ${{ steps.get_version.outputs.version-without-v }}
32 |       
33 |     - name: update env
34 |       run: echo "PACKAGE_VERSION=${{ steps.get_version.outputs.version-without-v }}" >> $GITHUB_ENV
35 |         
36 |     - name: print package version
37 |       run: echo $PACKAGE_VERSION
38 |       
39 |     - name: Set up Python
40 |       uses: actions/setup-python@v3
41 |       with:
42 |         python-version: '3.10'
43 |     - name: Install dependencies
44 |       run: |
45 |         python -m pip install --upgrade pip
46 |         pip install build    
47 |     - name: Build Package
48 |       run: python -m build
49 |     - name: Publish Package
50 |       uses: pypa/gh-action-pypi-publish@v1.5.1
51 |       with:
52 |         user: __token__
53 |         password: ${{ secrets.PYPI_TOKEN }}


--------------------------------------------------------------------------------
/tests/test_get_namespaces.py:
--------------------------------------------------------------------------------
 1 | from src.schema_st4_parser import get_namespaces
 2 | import io
 3 | import pytest
 4 | 
 5 | def test_should_parse_namespaces_from_valid_input():
 6 |     valid_string = '''
 7 |     <d:xie d:version="12.0.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:n="http://www.schema.de/2004/ST4/XmlImportExport/Node" xmlns:d="http://www.schema.de/2004/ST4/XmlImportExport/Data" xmlns:l="http://www.schema.de/2004/ST4/XmlImportExport/Link" xmlns:m="http://www.schema.de/2004/ST4/XmlImportExport/Meta" m:Dimension-GuiLanguage="de" m:Dimension-Sprache="de" xsi:schemaLocation="http://www.schema.de/2004/ST4/XmlImportExport/Node schemas/node.xsd http://www.schema.de/2004/ST4/XmlImportExport/Data schemas/data.xsd http://www.schema.de/2004/ST4/XmlImportExport/Link schemas/link.xsd">
 8 |     </d:xie>
 9 |     '''
10 |     file = io.StringIO(valid_string)
11 |     result = get_namespaces(file)
12 |     assert "n" in result  
13 |     assert "l" in result
14 |     assert "d" in result
15 |     assert "xsi" in result
16 |     
17 | def test_should_raise_exception_from_invalid_input():
18 |     invalid_string = '''
19 |     <d:xie d:version="12.0.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:n="http://www.schema.de/2004/ST4/XmlImportExport/Node" xmlns:d="http://www.schema.de/2004/ST4/XmlImportExport/Data" xmlns:l="http://www.schema.de/2004/ST4/XmlImportExport/Link" xmlns:m="http://www.schema.de/2004/ST4/XmlImportExport/Meta" m:Dimension-GuiLanguage="de" m:Dimension-Sprache="de" xsi:schemaLocation="http://www.schema.de/2004/ST4/XmlImportExport/Node schemas/node.xsd http://www.schema.de/2004/ST4/XmlImportExport/Data schemas/data.xsd http://www.schema.de/2004/ST4/XmlImportExport/Link schemas/link.xsd">
20 |     '''
21 |     file = io.StringIO(invalid_string)
22 |     with pytest.raises(Exception):
23 |         result = get_namespaces(file)
24 | 


--------------------------------------------------------------------------------
/tests/test.xml:
--------------------------------------------------------------------------------
 1 | <d:xie d:version="12.0.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:n="http://www.schema.de/2004/ST4/XmlImportExport/Node" xmlns:d="http://www.schema.de/2004/ST4/XmlImportExport/Data" xmlns:l="http://www.schema.de/2004/ST4/XmlImportExport/Link" xmlns:m="http://www.schema.de/2004/ST4/XmlImportExport/Meta" m:Dimension-GuiLanguage="de" m:Dimension-Sprache="de" xsi:schemaLocation="http://www.schema.de/2004/ST4/XmlImportExport/Node schemas/node.xsd http://www.schema.de/2004/ST4/XmlImportExport/Data schemas/data.xsd http://www.schema.de/2004/ST4/XmlImportExport/Link schemas/link.xsd">
 2 |  <n:SystemFolder n:Id="" l:Id="" l:Sort="" l:Label="">
 3 |     <n:Folder n:Id="" l:Id="" l:Sort="" l:Label="">
 4 |         <n:ResourceFolder n:Id="" l:Id="" l:Sort="" l:Label="">
 5 |           <n:GraficResource n:Id="nID" l:Id="lID" n:trans.EditLanguage="" l:Label="label">
 6 |             <n:Data-Title>
 7 |                   <n:Value n:Aspect="en">
 8 |                     <n:Entry n:Key="1">title_en</n:Entry>
 9 |                   </n:Value>
10 |                   <n:Value n:Aspect="de">
11 |                     <n:Entry n:Key="1">title_de</n:Entry>
12 |                   </n:Value>
13 |             </n:Data-Title>
14 |             <n:Data-Content>
15 |                 <n:Value n:Aspect="en">
16 |                 <n:Entry n:Key="1"><content>
17 |                     <p>English Content</p>
18 |                     </content></n:Entry>
19 |                 </n:Value>
20 |                 <n:Value n:Aspect="de">
21 |                 <n:Entry n:Key="1"><content>
22 |                     <p>German Content</p>
23 |                     </content></n:Entry>
24 |                 </n:Value>
25 |             </n:Data-Content>
26 |             <n:Data-Thumbnail>thumbnail</n:Data-Thumbnail>
27 |             <n:Data-Web>
28 |               <n:Value n:Aspect="en">data_web_en</n:Value>
29 |               <n:Value n:Aspect="de">data_web_de</n:Value>
30 |             </n:Data-Web>
31 |             <n:Data-Web.Data>
32 |               <n:Value n:Aspect="en">data_web_data_en</n:Value>
33 |               <n:Value n:Aspect="de">data_web_data_de</n:Value>
34 |             </n:Data-Web.Data>
35 |           </n:GraficResource>
36 |         </n:ResourceFolder>
37 |     </n:Folder>
38 |   </n:SystemFolder>
39 | </d:xie>


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | schemas/
  2 | streams/
  3 | T450_Betriebsanleitung.xml
  4 | main.py
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 | 
166 | 


--------------------------------------------------------------------------------
/src/schema_st4_parser/__init__.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional, Union
  2 | from dataclasses import dataclass
  3 | import xml.etree.ElementTree as ET
  4 | import os
  5 | 
  6 | @dataclass(frozen=True)
  7 | class St4Entry():
  8 |     type:str
  9 |     label:str
 10 |     node_id:str
 11 |     link_id:str
 12 |     titles:Dict[str,str]
 13 |     content:Dict[str,str]
 14 |     thumbnail:Optional[str]=None
 15 |     data_web:Optional[Dict[str,str]]=None
 16 |     data_web_data:Optional[Dict[str,str]]=None
 17 |     
 18 |     @property
 19 |     def languages(self)->List[str]:
 20 |         if len(self.content) == 0:
 21 |             return list(self.titles.keys())
 22 |         return list(self.content.keys())
 23 |     
 24 |     
 25 | def get_namespaces(xml_file:Union[str, os.PathLike])->Dict[str,str]:
 26 |     """
 27 |     Extracts the namespaces from a schema st4 xml file
 28 |     """
 29 |     namespaces = {}
 30 |     for event, elem in ET.iterparse(xml_file, events=("start", "start-ns")):
 31 |         if event == "start-ns":
 32 |             prefix, url = elem
 33 |             namespaces[prefix] = url
 34 |     return namespaces
 35 | 
 36 | def parse(xml_file:Union[str, os.PathLike])->List[St4Entry]:
 37 |     """
 38 |     Parses a schema st4 xml file and returns a list of St4Entry objects
 39 |     """
 40 |     namespaces = get_namespaces(xml_file)
 41 |     assert "n" in namespaces and  "l" in namespaces , "No namespaces found! Is this a valid ST4 file?"
 42 |      
 43 |     extracted_entries=[]
 44 |     
 45 |     def extract_language_and_values(element:ET.Element,with_entry=False)->Dict[str,str]:
 46 |         extracted={}
 47 |         value_elements = element.findall("./n:Value",namespaces)
 48 |         for value_element in value_elements:
 49 |             language = value_element.attrib[(f"{'{'+namespaces['n']+'}'}Aspect")]
 50 |             if with_entry:
 51 |                 entry_element = value_element.find(".//n:Entry",namespaces)
 52 |                 if entry_element is not None:
 53 |                     extracted[language]=entry_element.text
 54 |             else:
 55 |                 extracted[language]=value_element.text         
 56 |         return extracted
 57 |             
 58 |             
 59 |     tree = ET.parse(xml_file)
 60 |     root = tree.getroot()
 61 |     
 62 |     # Find all 'n:SystemFolder' elements
 63 |     system_folder_elements = root.findall(".//n:SystemFolder",namespaces)
 64 |     for system_folder_element in system_folder_elements:
 65 |         
 66 |         #get info elements
 67 |         
 68 |         info_elements = system_folder_element.findall(".//n:Data-Title/..",namespaces) #Just dont ask me why, but im not gonna hardcode the InfoType02 element 
 69 |         if info_elements is None:
 70 |             continue
 71 |         
 72 |         
 73 |         for info_element in info_elements:
 74 |             #extract label and ids
 75 |             type=info_element.tag
 76 |             label  = info_element.attrib[(f"{'{'+namespaces['l']+'}'}Label")]
 77 |             node_id =  info_element.attrib[(f"{'{'+namespaces['n']+'}'}Id")]
 78 |             link_id  = info_element.attrib[(f"{'{'+namespaces['l']+'}'}Id")]
 79 |             
 80 |             #extract the titles in all languages
 81 |             title_element = info_element.find(".//n:Data-Title",namespaces)
 82 |             titles=extract_language_and_values(title_element,with_entry=True)
 83 |             
 84 |             #get the content in all languages
 85 |             data_content_element = info_element.find(".//n:Data-Content",namespaces)
 86 |             content={}
 87 |             if data_content_element is not None:
 88 |                 value_elements = data_content_element.findall("./n:Value",namespaces)
 89 |                 
 90 |                 for value_element in value_elements:
 91 |                     language = value_element.attrib[(f"{'{'+namespaces['n']+'}'}Aspect")]
 92 |                     content_element = value_element.find(".//n:Entry//content",namespaces)
 93 |                     content[language]= ET.tostring(content_element, encoding='unicode')
 94 |              
 95 |             #check if we got content or titles, if not, skip this entry         
 96 |             if len(titles)==0 and len(content)==0:
 97 |                 continue
 98 |                        
 99 |             #get thumbnail if it exists
100 |             thumbnail=None
101 |             thumbnail_element = info_element.find(".//n:Data-Thumbnail",namespaces)
102 |             if thumbnail_element is not None:
103 |                 thumbnail = thumbnail_element.text
104 |                 
105 |             #get data web if it exists
106 |             data_web = None
107 |             data_web_element = info_element.find(".//n:Data-Web",namespaces)
108 |             if data_web_element is not None:
109 |                 data_web = extract_language_and_values(data_web_element)
110 |                 
111 |             # get data web.data if it exists // dont ask me why it is named this way, its just stupid
112 |             data_web_data = None
113 |             data_web_data_element = info_element.find(".//n:Data-Web.Data",namespaces)
114 |             if data_web_data_element is not None:
115 |                 data_web_data = extract_language_and_values(data_web_data_element)
116 |             
117 |             extracted_entries.append(St4Entry(type,label,node_id,link_id,titles,content,thumbnail,data_web,data_web_data))
118 |                 
119 |     return extracted_entries
120 |     
121 |     
122 |     
123 |     
124 |     
125 |     
126 | 


--------------------------------------------------------------------------------