├── imgquad
    ├── __init__.py
    ├── __main__.py
    ├── shared.py
    ├── profiles
    │   ├── mh-2025-tiff-300.xml
    │   └── mh-2025-tiff-600.xml
    ├── schemas
    │   ├── mh-2025-tiff-300.sch
    │   └── mh-2025-tiff-600.sch
    ├── jpegquality.py
    ├── schematron.py
    ├── properties.py
    └── imgquad.py
├── cli.py
├── package-pypi.sh
├── .gitignore
├── setup.py
├── LICENSE
└── README.md


/imgquad/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/imgquad/__main__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | 
3 | 
4 | """imgquad.__main__: executed when imgquad directory is called as script."""
5 | 
6 | 
7 | from .imgquad import main
8 | main()
9 | 


--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | #
3 | """CLI wrapper script, ensures that relative imports work correctly in a PyInstaller build"""
4 | 
5 | from imgquad.imgquad import main
6 | 
7 | if __name__ == '__main__':
8 |     main()
9 | 


--------------------------------------------------------------------------------
/imgquad/shared.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | """PDF Quality Assessment for Digitisation batches
 4 | 
 5 | Johan van der Knijff
 6 | 
 7 | Copyright 2024, KB/National Library of the Netherlands
 8 | 
 9 | Module with shared functions
10 | 
11 | """
12 | 
13 | import sys
14 | import os
15 | 
16 | def errorExit(msg):
17 |     """Write error to stderr and exit"""
18 |     msgString = "ERROR: {}\n".format(msg)
19 |     sys.stderr.write(msgString)
20 |     sys.exit()
21 | 
22 | 
23 | def checkFileExists(fileIn):
24 |     """Check if file exists and exit if not"""
25 |     if not os.path.isfile(fileIn):
26 |         msg = "file {} does not exist".format(fileIn)
27 |         errorExit(msg)
28 | 
29 | 
30 | def checkDirExists(pathIn):
31 |     """Check if directory exists and exit if not"""
32 |     if not os.path.isdir(pathIn):
33 |         msg = "directory {} does not exist".format(pathIn)
34 |         errorExit(msg)
35 | 


--------------------------------------------------------------------------------
/package-pypi.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script creates a wheel distribution and uploads it to PyPi
 4 | # 
 5 | # Requirements:
 6 | #
 7 | # twine https://pypi.python.org/pypi/twine/1.9.1 (pip install twine)
 8 | # wheel https://pypi.python.org/pypi/wheel (pip install wheel)
 9 | 
10 | # Repository: this is usually pypi; for testing use testpypi
11 | # The corresponding repository URLS are defined in config file ~/.pypirc
12 | #repository=testpypi
13 | repository=pypi
14 | 
15 | # Working directory
16 | workDir=$PWD
17 | 
18 | # Dist directory
19 | distDir=$workDir"/dist/"
20 | 
21 | # Clear contents of dist dir if it exists
22 | if [ -d "$distDir" ]; then
23 |     rm -r "$distDir"
24 | fi
25 | 
26 | # Create wheel
27 | python3 setup.py sdist bdist_wheel --universal
28 | 
29 | # Upload package if wheel build was successful; if not show error message
30 | if [ $? -eq 0 ]; then
31 |     twine upload --repository $repository dist/*
32 | else
33 |     echo "Wheel build not successful quitting now ..."
34 | fi
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # If you're thinking of un-ignoring any of these artefacts in a lower
 2 | # level .gitignore please think again. The none eclipse / maven options
 3 | # below are recommended candidates from http://help.github.com/ignore-files/
 4 | #
 5 | # As a general rule please don't commit:
 6 | #    IDE generated files, it upsets the IDEs of others
 7 | #    Compiled / built files (exes, jars, etc.), it's a source repository
 8 | #    Test data larger than a few KB, we'll go for bigger test files in the testbed
 9 | #
10 | # Remember, we'd like to keep the git repo light and small enough for people to 
11 | # download quickly and easily.
12 | #
13 | # Any questions then get in touch:
14 | #
15 | #    Carl Wilson Open Planets Foundation
16 | #    carlwilson@GitHub carl( AT )openplanetsfoundation.org.
17 | 
18 | # Eclipse Files #
19 | #################
20 | .externalToolBuilders
21 | .settings
22 | .classpath
23 | .project
24 | *.md.html
25 | bin
26 | .pydevproject
27 | 
28 | # Netbeans Files #
29 | #################
30 | nbactions.xml
31 | 
32 | # project build directories #
33 | #############################
34 | target
35 | build
36 | dist
37 | pyi-build
38 | 
39 | # Compiled Source #
40 | ###################
41 | *.com
42 | *.class
43 | *.dll
44 | *.exe
45 | *.o
46 | *.so
47 | *.pyc
48 | 
49 | # PyInstaller bits #
50 | ####################
51 | # *.spec
52 | 
53 | # Vagrant bits #
54 | ####################
55 | .vagrant/
56 | 
57 | # Packages #
58 | ############
59 | # Better to unpack and commt the raw source
60 | # git has its own built in compression methods
61 | *.7z
62 | *.dmg
63 | *.gz
64 | *.iso
65 | *.jar
66 | *.rar
67 | *.tar
68 | *.war
69 | *.zip
70 | *.dsc
71 | *.deb
72 | *.changes
73 | *.egg-info
74 | 
75 | # Logs and databases #
76 | ######################
77 | *.log
78 | *.sql
79 | *.sqlite
80 | 
81 | # Vue Backup Files #
82 | ######################
83 | .~*.vue
84 | 
85 | # OS Generated files #
86 | ######################
87 | .DS_Store*
88 | ehthumbs.db
89 | Icon?
90 | Thumbs.db
91 | .directory
92 | 
93 | # Files from gh-pages #
94 | #######################
95 | /_site
96 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Setup script for imgquad"""
 3 | import codecs
 4 | import os
 5 | import re
 6 | from setuptools import setup, find_packages
 7 | 
 8 | def read(*parts):
 9 |     """Read file and return contents"""
10 |     path = os.path.join(os.path.dirname(__file__), *parts)
11 |     with codecs.open(path, encoding='utf-8') as fobj:
12 |         return fobj.read()
13 | 
14 | def find_version(*file_paths):
15 |     """Find and return version number"""
16 |     version_file = read(*file_paths)
17 |     version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
18 |     if version_match:
19 |         return version_match.group(1)
20 |     raise RuntimeError("Unable to find version string.")
21 | 
22 | INSTALL_REQUIRES = ['setuptools',
23 |                     'lxml',
24 |                     'pillow>=9.0.0']
25 | PYTHON_REQUIRES = '>=3.8, <4'
26 | 
27 | README = open('README.md', 'r')
28 | README_TEXT = README.read()
29 | README.close()
30 | 
31 | setup(name='imgquad',
32 |       packages=find_packages(),
33 |       version=find_version('imgquad', 'imgquad.py'),
34 |       license='Apache License (https://www.apache.org/licenses/LICENSE-2.0)',
35 |       install_requires=INSTALL_REQUIRES,
36 |       python_requires=PYTHON_REQUIRES,
37 |       platforms=['POSIX', 'Windows'],
38 |       description='IMaGe QUality Assessment for Digitisation batches',
39 |       long_description=README_TEXT,
40 |       long_description_content_type='text/markdown',
41 |       author='Johan van der Knijff',
42 |       author_email='johan.vanderknijff@kb.nl',
43 |       maintainer='Johan van der Knijff',
44 |       maintainer_email='johan.vanderknijff@kb.nl',
45 |       url='https://github.com/KBNLresearch/imgquad',
46 |       download_url='https://github.com/KBNLresearch/imgquad/archive/' \
47 |         + find_version('imgquad', 'imgquad.py') + '.tar.gz',
48 |       package_data={'imgquad': ['*.*',
49 |                                 'profiles/*.*',
50 |                                 'schemas/*.*']},
51 |       entry_points={'console_scripts': [
52 |           'imgquad = imgquad.imgquad:main',
53 |       ]},
54 |       classifiers=[
55 |           'Environment :: Console',
56 |           'Programming Language :: Python :: 3',
57 |       ]
58 |      )
59 | 


--------------------------------------------------------------------------------
/imgquad/profiles/mh-2025-tiff-300.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <profile>
 4 | 
 5 | <!-- Middeleeuwse Handschriften, 2025 specs (TIFF, 300 ppi)-->
 6 | 
 7 | <!-- File extensions that will be processed (case insensitive) -->
 8 | <extension>tif</extension>
 9 | <extension>tiff</extension>
10 | 
11 | <!-- Namespace definitions (used in summaryProperty paths)-->
12 | <ns uri="adobe:ns:meta/" prefix="x"/>
13 | <ns uri="http://www.w3.org/1999/02/22-rdf-syntax-ns#" prefix="rdf"/>
14 | <ns uri="http://ns.adobe.com/photoshop/1.0/" prefix="photoshop"/>
15 | 
16 | <!-- Properties that are written to summary file -->
17 | <summaryProperty>properties/image/format</summaryProperty>
18 | <summaryProperty>properties/image/icc_profile_name</summaryProperty>
19 | <summaryProperty>properties/image/tiff/XResolution</summaryProperty>
20 | <summaryProperty>properties/image/tiff/YResolution</summaryProperty>
21 | <summaryProperty>properties/image/tiff/ResolutionUnit</summaryProperty>
22 | <summaryProperty>properties/image/tiff/ImageWidth</summaryProperty>
23 | <summaryProperty>properties/image/tiff/ImageLength</summaryProperty>
24 | <summaryProperty>properties/image/tiff/BitsPerSample</summaryProperty>
25 | <summaryProperty>properties/image/tiff/Copyright</summaryProperty>
26 | <summaryProperty>properties/image/exif/Compression</summaryProperty>
27 | <summaryProperty>properties/image/exif/Software</summaryProperty>
28 | <summaryProperty>properties/image/exif/DateTimeOriginal</summaryProperty>
29 | <summaryProperty>properties/image/exif/Model</summaryProperty>
30 | <summaryProperty>properties/image/exif/Make</summaryProperty>
31 | <summaryProperty>properties/image/exif/ShutterSpeedValue</summaryProperty>
32 | <summaryProperty>properties/image/exif/ApertureValue</summaryProperty>
33 | <summaryProperty>properties/image/exif/ISOSpeedRatings</summaryProperty>
34 | <!-- Below properties can be encoded as either sub-elements or attibutes of
35 | an rdf:Description element, so they are duplicated here to cover both cases -->
36 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/photoshop:Headline</summaryProperty>
37 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/photoshop:Credit</summaryProperty>
38 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/@photoshop:Headline</summaryProperty>
39 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/@photoshop:Credit</summaryProperty>
40 | 
41 | <!-- Schematron schema definitions
42 | 
43 | Each "schema"
44 | element links a search pattern to a corresponding schema
45 | 
46 | - Value for "type" is either "fileName or "parentDirName"
47 | - Value for "match" is either "is", "startswith", "endswith" or "contains"
48 | -->
49 | 
50 | <schema>mh-2025-tiff-300.sch</schema>
51 | 
52 | </profile>
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/imgquad/profiles/mh-2025-tiff-600.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <profile>
 4 | 
 5 | <!-- Middeleeuwse Handschriften, 2025 specs (TIFF, 600 ppi)-->
 6 | 
 7 | <!-- File extensions that will be processed (case insensitive) -->
 8 | <extension>tif</extension>
 9 | <extension>tiff</extension>
10 | 
11 | <!-- Namespace definitions (used in summaryProperty paths)-->
12 | <ns uri="adobe:ns:meta/" prefix="x"/>
13 | <ns uri="http://www.w3.org/1999/02/22-rdf-syntax-ns#" prefix="rdf"/>
14 | <ns uri="http://ns.adobe.com/photoshop/1.0/" prefix="photoshop"/>
15 | 
16 | <!-- Properties that are written to summary file -->
17 | <summaryProperty>properties/image/format</summaryProperty>
18 | <summaryProperty>properties/image/icc_profile_name</summaryProperty>
19 | <summaryProperty>properties/image/tiff/XResolution</summaryProperty>
20 | <summaryProperty>properties/image/tiff/YResolution</summaryProperty>
21 | <summaryProperty>properties/image/tiff/ResolutionUnit</summaryProperty>
22 | <summaryProperty>properties/image/tiff/ImageWidth</summaryProperty>
23 | <summaryProperty>properties/image/tiff/ImageLength</summaryProperty>
24 | <summaryProperty>properties/image/tiff/BitsPerSample</summaryProperty>
25 | <summaryProperty>properties/image/tiff/Copyright</summaryProperty>
26 | <summaryProperty>properties/image/exif/Compression</summaryProperty>
27 | <summaryProperty>properties/image/exif/Software</summaryProperty>
28 | <summaryProperty>properties/image/exif/DateTimeOriginal</summaryProperty>
29 | <summaryProperty>properties/image/exif/Model</summaryProperty>
30 | <summaryProperty>properties/image/exif/Make</summaryProperty>
31 | <summaryProperty>properties/image/exif/ShutterSpeedValue</summaryProperty>
32 | <summaryProperty>properties/image/exif/ApertureValue</summaryProperty>
33 | <summaryProperty>properties/image/exif/ISOSpeedRatings</summaryProperty>
34 | <!-- Below properties can be encoded as either sub-elements or attibutes of
35 | an rdf:Description element, so they are duplicated here to cover both cases -->
36 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/photoshop:Headline</summaryProperty>
37 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/photoshop:Credit</summaryProperty>
38 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/@photoshop:Headline</summaryProperty>
39 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/@photoshop:Credit</summaryProperty>
40 | 
41 | <!-- Schematron schema definitions
42 | 
43 | Each "schema"
44 | element links a search pattern to a corresponding schema
45 | 
46 | - Value for "type" is either "fileName or "parentDirName"
47 | - Value for "match" is either "is", "startswith", "endswith" or "contains"
48 | -->
49 | 
50 | <schema>mh-2025-tiff-600.sch</schema>
51 | 
52 | </profile>
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/imgquad/schemas/mh-2025-tiff-300.sch:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!-- Schematron rules for Middeleeuwse Handschriften, 2025 specs, TIFF, 300 ppi -->
 3 | 
 4 | <s:schema xmlns:s="http://purl.oclc.org/dsdl/schematron">
 5 | <s:ns uri="adobe:ns:meta/" prefix="x"/>
 6 | <s:ns uri="http://www.w3.org/1999/02/22-rdf-syntax-ns#" prefix="rdf"/>
 7 | <s:ns uri="http://ns.adobe.com/photoshop/1.0/" prefix="photoshop"/>
 8 | 
 9 | <s:pattern>
10 |     <s:title>Middeleeuwse Handschriften, 2025 checks</s:title>
11 | 
12 |     <!-- Checks at image level -->
13 |     <s:rule context="//properties/image">
14 |         <!-- Check on image format -->
15 |         <s:assert test="(format = 'TIFF')">Unexpected image format (expected: TIFF)</s:assert>
16 |         <!-- Check on ICC profile name -->
17 |         <s:assert test="(icc_profile_name = 'eciRGB v2')">Unexpected ICC profile name</s:assert>
18 |     </s:rule>
19 | 
20 |     <!-- Checks at tiff tag level -->
21 |     <s:rule context="//properties/image/tiff">
22 |         <!-- Checks for X- and Y resolution tags -->
23 |         <s:assert test="(count(XResolution) &gt; 0)">Missing XResolution tag</s:assert>
24 |         <s:assert test="(count(YResolution) &gt; 0)">Missing YResolution tag</s:assert>
25 |         <s:assert test="(XResolution &gt; 299) and
26 |         (XResolution &lt; 301)">XResolution value outside permitted range</s:assert>
27 |         <s:assert test="(YResolution &gt; 299) and
28 |         (YResolution &lt; 301)">YResolution value outside permitted range</s:assert>
29 |         <s:assert test="(count(ResolutionUnit) &gt; 0)">Missing ResolutionUnit tag</s:assert>
30 |         <s:assert test="(ResolutionUnit = 2)">Wrong ResolutionUnit value</s:assert>
31 |         <!-- Following tags are ALWAYS present, so not sure if checks make sense --> 
32 |         <s:assert test="(count(ImageWidth) &gt; 0)">Missing ImageWidth tag</s:assert>
33 |         <s:assert test="(count(ImageLength) &gt; 0)">Missing ImageLength tag</s:assert>
34 |         <!-- Checks on BitsPerSample -->
35 |         <s:assert test="(count(BitsPerSample) &gt; 0)">Missing BitsPerSample tag</s:assert>
36 |         <s:assert test="(BitsPerSample = '8 8 8')">Wrong BitsPerSample value</s:assert>
37 |         <!-- Check on ICCProfile tag -->
38 |         <s:assert test="(count(ICCProfile) &gt; 0)">Missing ICCProfile tag</s:assert>
39 |         <!-- Check on Copyright tag -->
40 |         <s:assert test="(count(Copyright) &gt; 0)">Missing Copyright tag</s:assert>
41 |         <!-- Check image contains no more than 1 NewSubFileType tag -->
42 |         <s:assert test="(count(NewSubfileType) &lt; 2)">Multiple NewSubfileType tags</s:assert>
43 |         <!-- Check image doesn't contain SubIFDs tag -->
44 |         <s:assert test="(count(SubIFDs) = 0)">SubIFDs tag is not allowed</s:assert>
45 |     </s:rule>
46 | 
47 |     <!-- Checks at exif tag level -->
48 |     <s:rule context="//properties/image/exif">
49 |          <!-- Compression type checks -->
50 |          <s:assert test="(count(Compression) &gt; 0)">Missing Compression tag</s:assert>
51 |         <s:assert test="(Compression = 1)">Unexpected Compression value</s:assert>
52 |         <!-- Checks for capture and camera related tags -->
53 |         <s:assert test="(count(Software) &gt; 0)">Missing Software tag</s:assert>
54 |         <s:assert test="(Software != '')">Empty Software tag</s:assert>
55 |         <s:assert test="(count(DateTimeOriginal) &gt; 0)">Missing DateTimeOriginal tag</s:assert>
56 |         <s:assert test="(DateTimeOriginal != '')">Empty DateTimeOriginal tag</s:assert>
57 |         <s:assert test="(count(Model) &gt; 0)">Missing Model tag</s:assert>
58 |         <s:assert test="(Model != '')">Empty Model tag</s:assert>
59 |         <s:assert test="(count(Make) &gt; 0)">Missing Make tag</s:assert>
60 |         <s:assert test="(Make != '')">Empty Make tag</s:assert>
61 |         <s:assert test="(count(ShutterSpeedValue) &gt; 0)">Missing ShutterSpeedValue tag</s:assert>
62 |         <s:assert test="(ShutterSpeedValue != '')">Empty ShutterSpeedValue tag</s:assert>
63 |         <s:assert test="(count(ApertureValue) &gt; 0)">Missing ApertureValue tag</s:assert>
64 |         <s:assert test="(ApertureValue != '')">Empty ApertureValue tag</s:assert>
65 |         <s:assert test="(count(ISOSpeedRatings) &gt; 0)">Missing ISOSpeedRatings tag</s:assert>
66 |         <s:assert test="(ISOSpeedRatings != '')">Empty ISOSpeedRatings tag</s:assert>
67 |     </s:rule>
68 | 
69 |     <!-- Checks for descriptive metadata in XMP -->
70 |     <s:rule context="//properties/image/x:xmpmeta">
71 |         <!-- Checks on Headline and Credit elements. These can be defined as either dedicated sub-elements of rdf:Decription,
72 |         or as attributes of rdf:Description, so we need to check for both -->
73 |         <s:assert test="(count(rdf:RDF/rdf:Description/photoshop:Headline) &gt; 0 or count(rdf:RDF/rdf:Description/@photoshop:Headline) &gt; 0)">Missing Headline element</s:assert>
74 |         <s:assert test="((rdf:RDF/rdf:Description/photoshop:Headline != '') or (rdf:RDF/rdf:Description/@photoshop:Headline != ''))">Empty Headline element</s:assert>
75 |         <s:assert test="(count(rdf:RDF/rdf:Description/photoshop:Credit) &gt; 0 or count(rdf:RDF/rdf:Description/@photoshop:Credit) &gt; 0)">Missing Credit element</s:assert>
76 |         <s:assert test="((rdf:RDF/rdf:Description/photoshop:Credit != '') or (rdf:RDF/rdf:Description/@photoshop:Credit != ''))">Empty Credit element</s:assert>
77 |     </s:rule>
78 | 
79 |     <!-- Check for exceptions -->
80 |     <s:rule context="//properties/image/exceptions">
81 |         <!-- Check on absence of any exceptions while parsing the image -->
82 |         <s:assert test="(count(exception) = 0)">Properties extraction at image level resulted in one or more exceptions</s:assert>
83 |     </s:rule>
84 | 
85 | </s:pattern>
86 | </s:schema>
87 | 


--------------------------------------------------------------------------------
/imgquad/schemas/mh-2025-tiff-600.sch:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!-- Schematron rules for Middeleeuwse Handschriften, 2025 specs, TIFF, 600 ppi -->
 3 | 
 4 | <s:schema xmlns:s="http://purl.oclc.org/dsdl/schematron">
 5 | <s:ns uri="adobe:ns:meta/" prefix="x"/>
 6 | <s:ns uri="http://www.w3.org/1999/02/22-rdf-syntax-ns#" prefix="rdf"/>
 7 | <s:ns uri="http://ns.adobe.com/photoshop/1.0/" prefix="photoshop"/>
 8 | 
 9 | <s:pattern>
10 |     <s:title>Middeleeuwse Handschriften, 2025 checks</s:title>
11 | 
12 |     <!-- Checks at image level -->
13 |     <s:rule context="//properties/image">
14 |         <!-- Check on image format -->
15 |         <s:assert test="(format = 'TIFF')">Unexpected image format (expected: TIFF)</s:assert>
16 |         <!-- Check on ICC profile name -->
17 |         <s:assert test="(icc_profile_name = 'eciRGB v2')">Unexpected ICC profile name</s:assert>
18 |     </s:rule>
19 | 
20 |     <!-- Checks at tiff tag level -->
21 |     <s:rule context="//properties/image/tiff">
22 |         <!-- Checks for X- and Y resolution tags -->
23 |         <s:assert test="(count(XResolution) &gt; 0)">Missing XResolution tag</s:assert>
24 |         <s:assert test="(count(YResolution) &gt; 0)">Missing YResolution tag</s:assert>
25 |         <s:assert test="(XResolution &gt; 599) and
26 |         (XResolution &lt; 601)">XResolution value outside permitted range</s:assert>
27 |         <s:assert test="(YResolution &gt; 599) and
28 |         (YResolution &lt; 601)">YResolution value outside permitted range</s:assert>
29 |         <s:assert test="(count(ResolutionUnit) &gt; 0)">Missing ResolutionUnit tag</s:assert>
30 |         <s:assert test="(ResolutionUnit = 2)">Wrong ResolutionUnit value</s:assert>
31 |         <!-- Following tags are ALWAYS present, so not sure if checks make sense --> 
32 |         <s:assert test="(count(ImageWidth) &gt; 0)">Missing ImageWidth tag</s:assert>
33 |         <s:assert test="(count(ImageLength) &gt; 0)">Missing ImageLength tag</s:assert>
34 |         <!-- Checks on BitsPerSample -->
35 |         <s:assert test="(count(BitsPerSample) &gt; 0)">Missing BitsPerSample tag</s:assert>
36 |         <s:assert test="(BitsPerSample = '8 8 8')">Wrong BitsPerSample value</s:assert>
37 |         <!-- Check on ICCProfile tag -->
38 |         <s:assert test="(count(ICCProfile) &gt; 0)">Missing ICCProfile tag</s:assert>
39 |         <!-- Check on Copyright tag -->
40 |         <s:assert test="(count(Copyright) &gt; 0)">Missing Copyright tag</s:assert>
41 |         <!-- Check image contains no more than 1 NewSubFileType tag -->
42 |         <s:assert test="(count(NewSubfileType) &lt; 2)">Multiple NewSubfileType tags</s:assert>
43 |         <!-- Check image doesn't contain SubIFDs tag -->
44 |         <s:assert test="(count(SubIFDs) = 0)">SubIFDs tag is not allowed</s:assert>
45 | 
46 |     </s:rule>
47 | 
48 |     <!-- Checks at exif tag level -->
49 |     <s:rule context="//properties/image/exif">
50 |          <!-- Compression type checks -->
51 |          <s:assert test="(count(Compression) &gt; 0)">Missing Compression tag</s:assert>
52 |         <s:assert test="(Compression = 1)">Unexpected Compression value</s:assert>
53 |         <!-- Checks for capture and camera related tags -->
54 |         <s:assert test="(count(Software) &gt; 0)">Missing Software tag</s:assert>
55 |         <s:assert test="(Software != '')">Empty Software tag</s:assert>
56 |         <s:assert test="(count(DateTimeOriginal) &gt; 0)">Missing DateTimeOriginal tag</s:assert>
57 |         <s:assert test="(DateTimeOriginal != '')">Empty DateTimeOriginal tag</s:assert>
58 |         <s:assert test="(count(Model) &gt; 0)">Missing Model tag</s:assert>
59 |         <s:assert test="(Model != '')">Empty Model tag</s:assert>
60 |         <s:assert test="(count(Make) &gt; 0)">Missing Make tag</s:assert>
61 |         <s:assert test="(Make != '')">Empty Make tag</s:assert>
62 |         <s:assert test="(count(ShutterSpeedValue) &gt; 0)">Missing ShutterSpeedValue tag</s:assert>
63 |         <s:assert test="(ShutterSpeedValue != '')">Empty ShutterSpeedValue tag</s:assert>
64 |         <s:assert test="(count(ApertureValue) &gt; 0)">Missing ApertureValue tag</s:assert>
65 |         <s:assert test="(ApertureValue != '')">Empty ApertureValue tag</s:assert>
66 |         <s:assert test="(count(ISOSpeedRatings) &gt; 0)">Missing ISOSpeedRatings tag</s:assert>
67 |         <s:assert test="(ISOSpeedRatings != '')">Empty ISOSpeedRatings tag</s:assert>
68 |     </s:rule>
69 | 
70 |     <!-- Checks for descriptive metadata in XMP -->
71 |     <s:rule context="//properties/image/x:xmpmeta">
72 |         <!-- Checks on Headline and Credit elements. These can be defined as either dedicated sub-elements of rdf:Decription,
73 |         or as attributes of rdf:Description, so we need to check for both -->
74 |         <s:assert test="(count(rdf:RDF/rdf:Description/photoshop:Headline) &gt; 0 or count(rdf:RDF/rdf:Description/@photoshop:Headline) &gt; 0)">Missing Headline element</s:assert>
75 |         <s:assert test="((rdf:RDF/rdf:Description/photoshop:Headline != '') or (rdf:RDF/rdf:Description/@photoshop:Headline != ''))">Empty Headline element</s:assert>
76 |         <s:assert test="(count(rdf:RDF/rdf:Description/photoshop:Credit) &gt; 0 or count(rdf:RDF/rdf:Description/@photoshop:Credit) &gt; 0)">Missing Credit element</s:assert>
77 |         <s:assert test="((rdf:RDF/rdf:Description/photoshop:Credit != '') or (rdf:RDF/rdf:Description/@photoshop:Credit != ''))">Empty Credit element</s:assert>
78 |     </s:rule>
79 | 
80 |     <!-- Check for exceptions -->
81 |     <s:rule context="//properties/image/exceptions">
82 |         <!-- Check on absence of any exceptions while parsing the image -->
83 |         <s:assert test="(count(exception) = 0)">Properties extraction at image level resulted in one or more exceptions</s:assert>
84 |     </s:rule>
85 | 
86 | </s:pattern>
87 | </s:schema>
88 | 


--------------------------------------------------------------------------------
/imgquad/jpegquality.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | """
  4 | JPEG quality least squares matching demo.
  5 | 
  6 | Johan van der Knijff, KB National Library of the Netherlands, 2024.
  7 | 
  8 | See also:
  9 | 
 10 | https://www.bitsgalore.org/2024/10/30/jpeg-quality-estimation-using-simple-least-squares-matching-of-quantization-tables
 11 | 
 12 | """
 13 | import math
 14 | import argparse
 15 | from PIL import Image
 16 | 
 17 | def parseCommandLine():
 18 |     """Parse command line"""
 19 |     parser = argparse.ArgumentParser()
 20 |     parser.add_argument('JPEGsIn',
 21 |                         action="store",
 22 |                         type=str,
 23 |                         nargs='+',
 24 |                         help="input JPEG(s) (wildcards allowed)")
 25 | 
 26 |     # Parse arguments
 27 |     args = parser.parse_args()
 28 | 
 29 |     return args
 30 | 
 31 | 
 32 | def computeJPEGQuality(image):
 33 |     """Estimates JPEG quality using least squares matching between image
 34 |     quantization tables and standard tables from the JPEG ISO standard.
 35 |     
 36 |     This compares the image quantization tables against the standard quantization
 37 |     tables for *all* possible quality levels, which are generated using
 38 |     Equations 1 and 2 in Kornblum (2008):
 39 | 
 40 |     https://www.sciencedirect.com/science/article/pii/S1742287608000285
 41 | 
 42 |     Returns quality estimate, root mean squared error of residuals between
 43 |     image quantization coefficients and corresponding standard coefficients,
 44 |     and Nash-Sutcliffe Efficiency measure.
 45 |     """
 46 | 
 47 |     # Standard JPEG luminance and chrominance quantization tables
 48 |     # for 50% quality (ISO/IEC 10918-1 : 1993(E)), Annex K)
 49 |     lum_base = [16, 11, 10, 16, 24, 40, 51, 61,
 50 |                 12, 12, 14, 19, 26, 58, 60, 55,
 51 |                 14, 13, 16, 24, 40, 57, 69, 56,
 52 |                 14, 17, 22, 29, 51, 87, 80, 62,
 53 |                 18, 22, 37, 56, 68, 109, 103, 77,
 54 |                 24, 35, 55, 64, 81, 104, 113, 92,
 55 |                 49, 64, 78, 87, 103, 121, 120, 101,
 56 |                 72, 92, 95, 98, 112, 100, 103, 99]
 57 | 
 58 |     chrom_base = [17, 18, 24, 47, 99, 99, 99, 99,
 59 |                   18, 21, 26, 66, 99, 99, 99, 99,
 60 |                   24, 26, 56, 99, 99, 99, 99, 99,
 61 |                   47, 66, 99, 99, 99, 99, 99, 99,
 62 |                   99, 99, 99, 99, 99, 99, 99, 99,
 63 |                   99, 99, 99, 99, 99, 99, 99, 99,
 64 |                   99, 99, 99, 99, 99, 99, 99, 99,
 65 |                   99, 99, 99, 99, 99, 99, 99, 99]
 66 | 
 67 |     # Image quantization tables
 68 |     qdict = image.quantization
 69 |     noTables = len(qdict)
 70 | 
 71 |     # Default quantization table bit depth
 72 |     qBitDepth = 8
 73 | 
 74 |     if max(qdict[0]) > 255:
 75 |         # Any values greater than 255 indicate bir depth 16 
 76 |         qBitDepth = 16
 77 |     if noTables >= 2:
 78 |         if max(qdict[1]) > 255:
 79 |             qBitDepth = 16
 80 | 
 81 |     # Calculate mean of all value in quantization tables
 82 |     Tsum = sum(qdict[0])
 83 |     if noTables >= 2:
 84 |         Tsum += sum(qdict[1])
 85 |     Tmean = Tsum / (noTables*64)
 86 | 
 87 |     # List for storing squared error values
 88 |     errors = []
 89 | 
 90 |     # List for storing Nash–Sutcliffe Efficiency values
 91 |     nseVals = []
 92 | 
 93 |     # Iterate over all quality levels
 94 |     for i in range(100):
 95 |         # Quality level
 96 |         Q = i+1
 97 |         # Scaling factor (Eq 1 in Kornblum, 2008)
 98 |         if Q < 50:
 99 |             S = 5000/Q
100 |         else:
101 |             S = 200 - 2*Q
102 | 
103 |         # Initialize sum of squared differences between image quantization values
104 |         # and corresponding values from standard q tables for this quality level
105 |         sumSqErrors = 0
106 | 
107 |         # Initialize sum of squared differences between image quantization values
108 |         # and mean image quantization value (needed to calculate Nash Efficiency)
109 |         sumSqMean = 0
110 | 
111 |         # Iterate over all values in quantization tables for this quality
112 |         for j in range(64):
113 |             # Compute standard luminance table value from scaling factor
114 |             # (Eq 2 in Kornblum, 2008)
115 |             Tslum = max(math.floor((S*lum_base[j] + 50) / 100), 1)
116 |             # Cap Tslum at 255 if bit depth is 8
117 |             if qBitDepth == 8:
118 |                 Tslum = min(Tslum, 255)
119 |             # Update sum of squared errors relative to corresponding
120 |             # image table value
121 |             sumSqErrors += (qdict[0][j] - Tslum)**2
122 | 
123 |             # Sum of luminance and chrominance values          
124 |             Tcombi = qdict[0][j]
125 | 
126 |             if noTables >= 2:
127 |                 # Compute standard chrominance table value from scaling factor
128 |                 # (Eq 2 in Kornblum, 2008)
129 |                 Tschrom = max(math.floor((S*chrom_base[j] + 50) / 100), 1)
130 |                 # Cap Tschrom at 255 if bit depth is 8
131 |                 if qBitDepth == 8:
132 |                     Tschrom = min(Tschrom, 255)
133 |                 # Update sum of squared errors relative to corresponding
134 |                 # image table value
135 |                 sumSqErrors  += (qdict[1][j] - Tschrom)**2
136 | 
137 |                 # Update sum of luminance and chrominance values
138 |                 Tcombi += qdict[1][j]
139 | 
140 |             # Update sumSqMMean
141 |             sumSqMean += (Tcombi - Tmean)**2
142 | 
143 |             j += 1
144 | 
145 |         # Calculate Nash-Sutcliffe Effiency
146 |         nse = 1 - sumSqErrors/sumSqMean
147 | 
148 |         # Add calculated statistics to lists
149 |         errors.append(sumSqErrors)
150 |         nseVals.append(nse)
151 | 
152 |     # Quality is estimated as level with smallest sum of squared errors
153 |     # Note that this will return the smallest quality level in case
154 |     # the smallest SSE occurs for more than one level!
155 |     # TODO: perhaps add a check for this and report as output?
156 |     qualityEst = errors.index(min(errors)) + 1
157 |     # Corresponding SSE. Value 0 indicates exact match with standard JPEG
158 |     # quantization tables. Any other value means non-standard tables were
159 |     # used, and quality estimate is an approximation
160 |     sumSqErrors = min(errors)
161 |     # Compute corresponding root mean squared error
162 |     rmsError = round(math.sqrt(sumSqErrors / (noTables * 64)), 3)
163 |     nse = round(max(nseVals), 3)
164 |     return qualityEst, rmsError, nse
165 | 
166 | 
167 | def main():
168 |     args = parseCommandLine()
169 |     myJPEGs =  args.JPEGsIn
170 |     myJPEGs.sort()
171 | 
172 |     for JPEG in myJPEGs:
173 |         with open(JPEG, 'rb') as fIn:
174 |             im = Image.open(fIn)
175 |             im.load()
176 |             print("*** Image: {}".format(JPEG))
177 |             quality, rmsError, nse = computeJPEGQuality(im)
178 |             print("quality: {}, RMS Error: {}, NSE: {}".format(quality, rmsError, nse))
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     main()


--------------------------------------------------------------------------------
/imgquad/schematron.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | """PDF Quality Assessment for Digitisation batches
  4 | 
  5 | Johan van der Knijff
  6 | 
  7 | Copyright 2024, KB/National Library of the Netherlands
  8 | 
  9 | Module with code related to schematron, schemas and profiles
 10 | 
 11 | """
 12 | 
 13 | import sys
 14 | import os
 15 | import logging
 16 | from lxml import isoschematron
 17 | from lxml import etree
 18 | from . import shared
 19 | 
 20 | 
 21 | def listProfilesSchemas(profilesDir, schemasDir):
 22 |     """List all available profiles and schemas"""
 23 |     profiles = os.listdir(profilesDir)
 24 |     print("Available profiles (directory {}):".format(profilesDir))
 25 |     for profile in profiles:
 26 |         print("  - {}".format(profile))
 27 |     schemas = os.listdir(schemasDir)
 28 |     print("Available schemas (directory {}):".format(schemasDir))
 29 |     for schema in schemas:
 30 |         print("  - {}".format(schema))
 31 |     sys.exit()
 32 | 
 33 | 
 34 | def checkProfilesSchemas(profilesDir, schemasDir):
 35 |     """Check if all profiles and schemas can be read without
 36 |     throwing parse errors"""
 37 |     profiles = os.listdir(profilesDir)
 38 |     for profile in profiles:
 39 |         try:
 40 |             readAsLXMLElt(os.path.join(profilesDir, profile))
 41 |         except Exception:
 42 |             msg = ("error parsing profile {}").format(profile)
 43 |             shared.errorExit(msg)
 44 |     schemas = os.listdir(schemasDir)
 45 |     for schema in schemas:
 46 |         try:
 47 |             schemaElt = readAsLXMLElt(os.path.join(schemasDir, schema))
 48 |         except Exception:
 49 |             msg = ("error parsing schema {}").format(schema)
 50 |             raise
 51 |             shared.errorExit(msg)
 52 |         try:
 53 |             isoschematron.Schematron(schemaElt)
 54 |         except etree.XSLTParseError:
 55 |             msg = ("XSLT parse error for schema {}").format(schema)
 56 |             raise
 57 |             shared.errorExit(msg)       
 58 | 
 59 | 
 60 | def readProfile(profile, schemasDir):
 61 |     """Read a profile and returns list with for each schema
 62 |     element the corresponding type, matching method, matching
 63 |     pattern and schematronj file"""
 64 | 
 65 |     # Parse XML tree
 66 |     try:
 67 |         tree = etree.parse(profile)
 68 |         prof = tree.getroot()
 69 |     except Exception:
 70 |         msg = "error parsing {}".format(profile)
 71 |         shared.errorExit(msg)
 72 | 
 73 |     # Output extensions list
 74 |     listExtensions = []
 75 | 
 76 |     # Output namespaces dictionary
 77 |     dictNamespaces = {}
 78 | 
 79 |     # Output properties list
 80 |     listProperties = []
 81 | 
 82 |     # Output schemas list
 83 |     listSchemas = []
 84 | 
 85 |     # Locate extension elements
 86 |     extensions = prof.findall("extension")
 87 | 
 88 |     # Add extensions to output list
 89 |     for extension in extensions:
 90 |         listExtensions.append(extension.text)
 91 | 
 92 |     # Locate namespace elements
 93 |     namespaces = prof.findall("ns")
 94 | 
 95 |     # Add namespace prefixes and uris to dictionary
 96 |     for namespace in namespaces:
 97 |         uri = namespace.attrib['uri']
 98 |         prefix = namespace.attrib['prefix']
 99 |         dictNamespaces[prefix] = uri
100 | 
101 |     # Locate summary properties elements and add them to list
102 |     sProperties = prof.findall("summaryProperty")
103 | 
104 |     for property in sProperties:
105 |         listProperties.append(property.text)
106 | 
107 |     # Flag that indicates use of "type" attribute
108 |     hasType = True
109 | 
110 |     # Locate schema elements
111 |     schemas = prof.findall("schema")
112 | 
113 |     # Add schemas to output list
114 |     for schema in schemas:
115 |         try:
116 |             mType = schema.attrib["type"]
117 |             if mType not in ["fileName", "parentDirName"]:
118 |                 msg = "'{}' is not a valid 'type' value".format(mType)
119 |                 shared.errorExit(msg)
120 |         except KeyError:
121 |             hasType = False
122 | 
123 |         if hasType:
124 |             try:
125 |                 mMatch = schema.attrib["match"]
126 |                 if mMatch not in ["is", "startswith", "endswith", "contains"]:
127 |                     msg = "'{}' is not a valid 'match' value".format(mMatch)
128 |                     shared.errorExit(msg)
129 |             except KeyError:
130 |                 msg = "missing 'match' attribute in profile {}".format(profile)
131 |                 shared.errorExit(msg)
132 |             try:
133 |                 mPattern = schema.attrib["pattern"]
134 |             except KeyError:
135 |                 msg = "missing 'pattern' attribute in profile {}".format(profile)
136 |                 shared.errorExit(msg)
137 |         else:
138 |             mType = None
139 |             mMatch = None
140 |             mPattern = None
141 | 
142 |         schematronFile = os.path.join(schemasDir, schema.text)
143 |         shared.checkFileExists(schematronFile)
144 | 
145 |         listSchemas.append([mType, mMatch, mPattern, schematronFile])
146 | 
147 |     return listExtensions, dictNamespaces, listProperties, listSchemas
148 | 
149 | 
150 | def readAsLXMLElt(xmlFile):
151 |     """Parse XML file with lxml and return result as element object
152 |     (not the same as Elementtree object!)
153 |     """
154 | 
155 |     f = open(xmlFile, 'r', encoding="utf-8")
156 |     # Note we're using lxml.etree here rather than elementtree
157 |     resultAsLXMLElt = etree.parse(f)
158 |     f.close()
159 | 
160 |     return resultAsLXMLElt
161 | 
162 | 
163 | def summariseSchematron(report):
164 |     """Return summarized version of Schematron report with only output of
165 |     failed tests"""
166 | 
167 |     for elem in report.iter():
168 |         if elem.tag == "{http://purl.oclc.org/dsdl/svrl}fired-rule":
169 |             elem.getparent().remove(elem)
170 | 
171 |     return report
172 | 
173 | 
174 | def findSchema(PDF, schemas):
175 |     """Find schema based on match with name or parent directory"""
176 | 
177 |     # Initial value of flag that indicates schema match
178 |     schemaMatchFlag = False
179 |     # Initial value of schema reference
180 |     schemaMatch = "undefined"
181 | 
182 |     fPath, fName = os.path.split(PDF)
183 |     parentDir = os.path.basename(fPath)
184 | 
185 |     for schema in schemas:
186 |         mType = schema[0]
187 |         mMatch = schema[1]
188 |         mPattern = schema[2]
189 |         mSchema = schema[3]
190 |         if mType == None:
191 |             schemaMatch = mSchema
192 |             schemaMatchFlag = True
193 |         if mType == "parentDirName" and mMatch == "is":
194 |             if parentDir == mPattern:
195 |                 schemaMatch = mSchema
196 |                 schemaMatchFlag = True
197 |         elif mType == "parentDirName" and mMatch == "startswith":
198 |             if parentDir.startswith(mPattern):
199 |                 schemaMatch = mSchema
200 |                 schemaMatchFlag = True
201 |         elif mType == "parentDirName" and mMatch == "endswith":
202 |             if parentDir.endswith(mPattern):
203 |                 schemaMatch = mSchema
204 |                 schemaMatchFlag = True
205 |         elif mType == "parentDirName" and mMatch == "contains":
206 |             if mPattern in parentDir:
207 |                 schemaMatch = mSchema
208 |                 schemaMatchFlag = True
209 |         if mType == "fileName" and mMatch == "is":
210 |             if fName == mPattern:
211 |                 schemaMatch = mSchema
212 |                 schemaMatchFlag = True
213 |         elif mType == "fileName" and mMatch == "startswith":
214 |             if fName.startswith(mPattern):
215 |                 schemaMatch = mSchema
216 |                 schemaMatchFlag = True
217 |         elif mType == "fileName" and mMatch == "endswith":
218 |             if fName.endswith(mPattern):
219 |                 schemaMatch = mSchema
220 |                 schemaMatchFlag = True
221 |         elif mType == "fileName" and mMatch == "contains":
222 |             if mPattern in fName:
223 |                 schemaMatch = mSchema
224 |                 schemaMatchFlag = True
225 | 
226 |     return schemaMatchFlag, schemaMatch
227 | 
228 | 
229 | def validate(schema, propertiesElt, verboseFlag):
230 |     """Validate extracted properties against schema"""
231 | 
232 |     # Initial value of validation outcome
233 |     validationOutcome = "Pass"
234 | 
235 |     # Initial value of flag that indicates whether validation ran
236 |     validationSuccess = False
237 | 
238 |     # Element used to store validation report
239 |     reportElt = etree.Element("schematronReport")
240 |     # Get schema as lxml.etree element
241 |     mySchemaElt = readAsLXMLElt(schema)
242 |     # Start Schematron magic ...
243 |     schematron = isoschematron.Schematron(mySchemaElt,
244 |                                           store_report=True)
245 | 
246 |     try:
247 |         # Validate properties element against schema
248 |         validationResult = schematron.validate(propertiesElt)
249 |         # Set status to "Fail" if properties didn't pass validation
250 |         if not validationResult:
251 |             validationOutcome = "Fail"
252 |         report = schematron.validation_report
253 |         validationSuccess = True
254 | 
255 |     except Exception:
256 |         validationOutcome = "Fail"
257 |         logging.error(("Schematron validation failed for {}").format(schema))
258 | 
259 |     try:
260 |         # Re-parse Schematron report
261 |         report = etree.fromstring(str(report))
262 |         # Make report less verbose
263 |         if not verboseFlag:
264 |             report = summariseSchematron(report)
265 |         # Add to report element
266 |         reportElt.append(report)
267 |     except Exception:
268 |         # No report available because Schematron validation failed
269 |         pass
270 | 
271 |     return validationSuccess, validationOutcome, reportElt
272 | 


--------------------------------------------------------------------------------
/imgquad/properties.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | """ImagGe Quality Assessment for Digitisation batches
  4 | 
  5 | Johan van der Knijff
  6 | 
  7 | Copyright 2025, KB/National Library of the Netherlands
  8 | 
  9 | Image properties extraction module
 10 | 
 11 | """
 12 | import os
 13 | import sys #remove, test only
 14 | import io
 15 | import logging
 16 | import base64
 17 | from lxml import etree
 18 | import PIL
 19 | from PIL import ImageCms
 20 | from PIL.TiffTags import TAGS as TAGS_TIFF
 21 | from PIL.ExifTags import TAGS as TAGS_EXIF, GPSTAGS, IFD
 22 | from . import jpegquality
 23 | 
 24 | def dictionaryToElt(name, dictionary):
 25 |     """Create Element object from dictionary, with recursion"""
 26 |     elt = etree.Element(name)
 27 | 
 28 |     for k, v in dictionary.items():
 29 |         if isinstance(v, dict):
 30 |             child = dictionaryToElt(str(k),v)
 31 |             elt.append(child)
 32 |         else:
 33 |             child = etree.Element(k)
 34 |             child.text = str(v)
 35 |         elt.append(child)
 36 | 
 37 |     return elt
 38 | 
 39 | 
 40 | def getBPC(image):
 41 |     """Return Bits per Component as a function of mode and components values"""
 42 |     mode_to_bpp = {"1": 1,
 43 |                    "L": 8,
 44 |                    "P": 8,
 45 |                    "RGB": 24,
 46 |                    "RGBA": 32,
 47 |                    "CMYK": 32,
 48 |                    "YCbCr": 24,
 49 |                    "LAB": 24,
 50 |                    "HSV": 24,
 51 |                    "I": 32,
 52 |                    "F": 32}
 53 | 
 54 |     bitsPerPixel = mode_to_bpp[image.mode]
 55 |     noComponents = len(image.getbands())
 56 | 
 57 |     if noComponents != 0  and isinstance(bitsPerPixel, int):
 58 |         bpc = int(bitsPerPixel/noComponents)
 59 |     else:
 60 |         bpc = -9999
 61 | 
 62 |     return bpc
 63 | 
 64 | 
 65 | def getProperties(file):
 66 |     """Extract properties and return result as Element object"""
 67 | 
 68 |     # Create element object to store all properties
 69 |     propertiesElt = etree.Element("properties")
 70 | 
 71 |     # Element to store exceptions at file level
 72 |     exceptionsFileElt = etree.Element("exceptions")
 73 | 
 74 |     # Create and fill descriptive elements
 75 |     fPathElt = etree.Element("filePath")
 76 |     fPathElt.text = file
 77 |     fNameElt = etree.Element("fileName")
 78 |     fNameElt.text = os.path.basename(file)
 79 |     fSizeElt = etree.Element("fileSize")
 80 |     fSizeElt.text = str(os.path.getsize(file))
 81 | 
 82 |     # Add to properies element
 83 |     propertiesElt.append(fPathElt)
 84 |     propertiesElt.append(fNameElt)
 85 |     propertiesElt.append(fSizeElt)
 86 | 
 87 |     # Read image
 88 |     try:
 89 |         im = PIL.Image.open(file)
 90 |         im.load()
 91 |         propsImageElt = getImageProperties(im)
 92 |         propertiesElt.append(propsImageElt)
 93 | 
 94 |     except Exception  as e:
 95 |         ex = etree.SubElement(exceptionsFileElt,'exception')
 96 |         ex.text = str(e)
 97 |         propertiesElt.append(exceptionsFileElt)
 98 |         logging.warning(("while opening image: {}").format(str(e)))
 99 |         #raise
100 |         return propertiesElt
101 | 
102 |     return propertiesElt
103 | 
104 | 
105 | def getImageProperties(image):
106 |     """Extract image properties and return result as Element object"""
107 | 
108 |     # Dictionary for storing image properties
109 |     propsImage = {}
110 |     # Element for storing image-level exceptions
111 |     exceptionsImageElt = etree.Element("exceptions")
112 | 
113 |     propsImage['format'] = image.format
114 |     width = image.size[0]
115 |     height = image.size[1]
116 |     propsImage['width'] = width
117 |     propsImage['height'] = height
118 |     propsImage['mode'] = image.mode
119 |     noComponents = len(image.getbands())
120 |     propsImage['components']= noComponents
121 |     bitsPerComponent = getBPC(image)
122 |     propsImage['bpc'] = bitsPerComponent
123 | 
124 |     if image.format == "JPEG":
125 |         try:
126 |             # Estimate JPEG quality using least squares matching
127 |             # against standard quantization tables
128 |             quality, rmsError, nse = jpegquality.computeJPEGQuality(image)
129 |             propsImage['JPEGQuality'] = quality
130 |             propsImage['NSE_JPEGQuality'] = nse
131 |         except Exception as e:
132 |             ex = etree.SubElement(exceptionsImageElt,'exception')
133 |             ex.text = str(e)
134 |             logging.warning(("while estimating JPEG quality from image: {}").format(str(e)))
135 | 
136 | 
137 |     for key, value in image.info.items():
138 | 
139 |         if key == 'exif':
140 |             # Skip any exif elements as Exif tags are added later
141 |             pass
142 |         elif key == 'photoshop':
143 |             # Skip photoshop elements, because they tend to be large and I don't know how to
144 |             # properly decode them
145 |             pass
146 |         elif isinstance(value, bytes):
147 |             propsImage[key] = 'bytestream'
148 |         elif key == 'dpi' and isinstance(value, tuple):
149 |             propsImage['ppi_x'] = value[0]
150 |             propsImage['ppi_y'] = value[1]
151 |         elif key == 'jfif_density' and isinstance(value, tuple):
152 |             propsImage['jfif_density_x'] = value[0]
153 |             propsImage['jfif_density_y'] = value[1]
154 |         elif isinstance(value, tuple):
155 |             # Skip any other properties that return tuples
156 |             pass
157 |         else:
158 |             propsImage[key] = value
159 | 
160 |     # ICC profile name and description
161 |     iccFlag = False
162 |     try:
163 |         icc = image.info['icc_profile']
164 |         iccFlag = True
165 |     except KeyError:
166 |         pass
167 | 
168 |     if iccFlag:
169 |         try:
170 |             iccProfile = ImageCms.ImageCmsProfile(io.BytesIO(icc))
171 |             propsImage['icc_profile_name'] = ImageCms.getProfileName(iccProfile).strip()
172 |             propsImage['icc_profile_description'] = ImageCms.getProfileDescription(iccProfile).strip()
173 |         except Exception as e:
174 |             ex = etree.SubElement(exceptionsImageElt,'exception')
175 |             ex.text = str(e)
176 |             logging.warning(("while extracting ICC profile properties from image: {}").format(str(e)))
177 | 
178 | 
179 |     if image.format == "TIFF":
180 |         # Create element object to store TIFF tags
181 |         propsTIFFElt = etree.Element("tiff")
182 | 
183 |         # Iterate over TIFF tags, code adapted from:
184 |         # https://stackoverflow.com/a/75357594/1209004 and
185 |         # https://stackoverflow.com/a/46910779
186 | 
187 |         propsTIFF = {}
188 |         for key in image.tag.keys():
189 |             if key in TAGS_TIFF:
190 |                 propsTIFF[TAGS_TIFF[key]] = image.tag[key]
191 | 
192 |         for k, d in propsTIFF.items():
193 |             tag = k
194 |             tiffElt = etree.Element(str(tag))
195 | 
196 |             # Don't include values of below tags
197 |             if tag not in ['PhotoshopInfo', 'ICCProfile', 'IptcNaaInfo', 'XMP', 'ImageSourceData'] and isinstance(d, tuple):
198 |                 # extracted value is tuple, so reformat as spece-delimited string
199 |                 v = ''
200 |                 if tag not in ['XResolution', 'YResolution']:
201 |                     for x in d:
202 |                         v = v + ' ' + str(x)
203 |                 else:
204 |                     try:
205 |                         # In case of XResolution / YResolution tag, parse numerator and denominator
206 |                         # values, and convert to resolution value
207 |                         num = d[0][0]
208 |                         den = d[0][1]
209 |                         v = str(num/den)
210 |                     except exception:
211 |                         raise
212 |                         pass
213 | 
214 |                 tiffElt.text = v.strip()
215 |             propsTIFFElt.append(tiffElt)
216 | 
217 |     # Exif tags
218 |     propsExif = image.getexif()
219 |     propsExifElt = etree.Element("exif")
220 | 
221 |     # Iterate over various Exif tags, code adapted from:
222 |     # https://stackoverflow.com/a/75357594/1209004
223 | 
224 |     for k, v in propsExif.items():
225 |         try:
226 |             # This exception handler deals with any tags that Pillow doesn't recognize
227 |             tag = TAGS_EXIF.get(k, k)
228 |             exifElt = etree.Element(str(tag))
229 |             if tag not in ['XMLPacket', 'InterColorProfile', 'IPTCNAA', 'ImageResources']:
230 |                 # Don't include content of these tags as text
231 |                 exifElt.text = str(v)
232 | 
233 |             propsExifElt.append(exifElt)
234 |         except ValueError:
235 |             pass
236 | 
237 |     for ifd_id in IFD:
238 |         # Iterate over image file directories
239 |         # NOTE: this can result in duplicate Exif Tags. Example: Thumbnail image is implemented as 
240 |         # separate IFD, with XResolution / YResolution tags whose values are different from
241 |         # main resolution tags. Currently these are all lumped together in the output.
242 |         try:
243 |             ifd = propsExif.get_ifd(ifd_id)
244 | 
245 |             if ifd_id == IFD.GPSInfo:
246 |                 resolve = GPSTAGS
247 |             else:
248 |                 resolve = TAGS_EXIF
249 | 
250 |             for k, v in ifd.items():
251 |                 tag = resolve.get(k, k)
252 |                 exifElt = etree.Element(str(tag))
253 |                 exifElt.text = str(v)
254 |                 propsExifElt.append(exifElt)
255 |         except KeyError:
256 |             pass
257 |         except ValueError:
258 |             pass
259 |     
260 |     # Read XMP metadata as string since dedicated getxmp function returns dictionary
261 |     # that is difficult to work with for our purposes 
262 |     # See: https://github.com/python-pillow/Pillow/issues/5076#issuecomment-2119966091
263 |     # this only works for TIFF!
264 |     containsXMP = False
265 |     if image.format == "TIFF":
266 |         try:
267 |             xmp = image.tag_v2[700].decode('utf-8')
268 |             # Convert to Element object
269 |             propsXMPElt = etree.fromstring(xmp)
270 |             containsXMP = True
271 |         except KeyError:
272 |             pass
273 | 
274 |     propsImageElt = dictionaryToElt('image', propsImage)
275 |     if image.format == "TIFF":
276 |         propsImageElt.append(propsTIFFElt)
277 |     propsImageElt.append(propsExifElt)
278 |     if containsXMP:
279 |         propsImageElt.append(propsXMPElt)
280 |     propsImageElt.append(exceptionsImageElt)
281 | 
282 |     return propsImageElt
283 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # IMaGe QUality Assessment for Digitisation batches
  2 | 
  3 | ## What is imgquad?
  4 | 
  5 | Imgquad is a simple tool for automated quality assessment of images in digitisation batches against a user-defined technical profile. It uses [Pillow](https://pillow.readthedocs.io/) to extract the relevant technical properties.
  6 | 
  7 | These properties are serialized to a simple XML structure, which is then evaluated against [Schematron rules](http://en.wikipedia.org/wiki/Schematron) that define the expected/required technical characteristics.
  8 | 
  9 | 
 10 | ## Installation
 11 | 
 12 | As of 2025, [uv](https://docs.astral.sh/uv/) appears to be the most straightforward tool for installing Python applications on a variety of platforms (Linux, MacOS, Windows).
 13 | 
 14 | ### uv installation
 15 | 
 16 | First, check if uv is installed on your system by typing the uv command in a terminal:
 17 | 
 18 | ```
 19 | uv
 20 | ```
 21 | 
 22 | If this results in a help message, uv is installed, and you can skip directly to the "imgquad installation" section below. If not, you first need to install uv.
 23 | 
 24 | On Linux and MacOS you can install uv with the following command:
 25 | 
 26 | ```
 27 | curl -LsSf https://astral.sh/uv/install.sh | sh
 28 | ```
 29 | 
 30 | Alternatively, you can use wget if your system doesn't have curl installed:
 31 | 
 32 | ```
 33 | wget -qO- https://astral.sh/uv/install.sh | sh
 34 | ```
 35 | 
 36 | To install uv on Windows, open a Powershell terminal, and run the following command:
 37 | 
 38 | ```
 39 | powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
 40 | ```
 41 | 
 42 | Regardless of the operating system, in some cases the installation script will update your system's configuration to make the location of the uv executable globally accessible. If this happens, just close your current terminal, and open a new one for these changes to take effect. Pay attention to the screen output of the installation script for any details on this.
 43 | 
 44 | ### imgquad installation
 45 | 
 46 | Use the following command to install imgquad (all platforms):
 47 | 
 48 | ```
 49 | uv tool install imgquad
 50 | ```
 51 | 
 52 | Then run imgquad once:
 53 | 
 54 | ```
 55 | imgquad
 56 | ```
 57 | 
 58 | Depending on your system, imgquad will create a folder named *imgquad* in one of the following locations: 
 59 | 
 60 | - For Linux and MacOS, it will use the location defined by environment variable *$XDG_CONFIG_HOME*. If this variable is not set, it will use the *.config* directory in the user's home folder (e.g. `/home/johan/.config/imgquad`). Note that the *.config* directory is hidden by default.
 61 | - For Windows, it will use the *AppData\Local* folder (e.g. `C:\Users\johan\AppData\Local\imgquad`).
 62 | 
 63 | The folder contains two subdirectories named *profiles* and *schemas*, which are explained in the "Profiles" and "Schemas" sections below.
 64 | 
 65 | ### upgrade imgquad
 66 | 
 67 | Use the following command to upgrade an existing imgquad installation to the latest version:
 68 | 
 69 | ```
 70 | uv tool upgrade imgquad
 71 | ```
 72 | 
 73 | ## Command-line syntax
 74 | 
 75 | The general syntax of imgquad is:
 76 | 
 77 | ```
 78 | usage: imgquad [-h] [--version] {process,list,copyps} ...
 79 | ```
 80 | 
 81 | Imgquad has three sub-commands:
 82 | 
 83 | |Command|Description|
 84 | |:-----|:--|
 85 | |process|Process a batch.|
 86 | |list|List available profiles and schemas.|
 87 | |copyps|Copy default profiles and schemas to user directory.|
 88 | 
 89 | ### process command
 90 | 
 91 | Run imgquad with the *process* command to process a batch. The syntax is:
 92 | 
 93 | ```
 94 | usage: imgquad process [-h] [--prefixout PREFIXOUT] [--outdir OUTDIR]
 95 |                        [--delimiter DELIMITER] [--verbose]
 96 |                        profile batchDir
 97 | ```
 98 | 
 99 | The *process* command expects the following positional arguments: 
100 | 
101 | |Argument|Description|
102 | |:-----|:--|
103 | |profile|This defines the validation profile. Note that any file paths entered here will be ignored, as Imgquad only accepts  profiles from the profiles directory. You can just enter the file name without the path. Use the *list* command to list all available profiles.|
104 | |batchDir|This defines the batch directory that will be analyzed.|
105 | 
106 | In addition, the following optional arguments are available:
107 | 
108 | |Argument|Description|
109 | |:-----|:--|
110 | |--prefixout, -p|This defines a text prefix on which the names of the output files are based (default: "pq").|
111 | |--outdir, -o|This defines the directory where output is written (default: current working directory from which imgquad is launched).|
112 | |--delimiter, -d|This defines the delimiter that is used in the output summary file (default: ';')|
113 | |--verbose, -b|This tells imgquad to report Schematron output in verbose format.|
114 | 
115 | In the simplest case, we can call imgquad with the profile and the batch directory as the only arguments:
116 | 
117 | ```
118 | imgquad process beeldstudio-retro.xml ./mybatch
119 | ```
120 | 
121 | Imgquad will now recursively traverse all directories and files inside the "mybatch" directory, and analyse all image files (based on a file extension match).
122 | 
123 | ### list command
124 | 
125 | Run imgquad with the *list* command to get a list of the available profiles and schemas, as well as their locations. For example:
126 | 
127 | ```
128 | imgquad list
129 | ```
130 | 
131 | Results in:
132 | 
133 | ```
134 | Available profiles (directory /home/johan/.config/imgquad/profiles):
135 |   - mh-2025-tiff.xml
136 | Available schemas (directory /home/johan/.config/imgquad/schemas):
137 |   - mh-2025-tiff-600.sch
138 | ```
139 | 
140 | ### copyps command
141 | 
142 | If you run imgquad with the *copyps* command, it will copy the default profiles and schemas that are included in the installation over to your user directory.
143 | 
144 | **Warning:** any changes you made to the default profiles or schemas will be lost after this operation, so proceed with caution! If you want to keep any of these files, just make a copy and save them under a different name before running the *copyps* command.
145 | 
146 | ## Profiles
147 | 
148 | A profile is an XML file that defines how a digitisation batch is evaluated. Here's an example:
149 | 
150 | ```xml
151 | <?xml version="1.0"?>
152 | 
153 | <profile>
154 | 
155 | <!-- Middeleeuwse Handschriften, 2025 specs (TIFF)-->
156 | 
157 | <!-- File extensions that will be processed (case insensitive) -->
158 | <extension>tif</extension>
159 | <extension>tiff</extension>
160 | 
161 | <!-- Namespace definitions (used in summaryProperty paths)-->
162 | <ns uri="adobe:ns:meta/" prefix="x"/>
163 | <ns uri="http://www.w3.org/1999/02/22-rdf-syntax-ns#" prefix="rdf"/>
164 | <ns uri="http://ns.adobe.com/photoshop/1.0/" prefix="photoshop"/>
165 | 
166 | <!-- Properties that are written to summary file -->
167 | <summaryProperty>properties/image/format</summaryProperty>
168 | <summaryProperty>properties/image/icc_profile_name</summaryProperty>
169 | <summaryProperty>properties/image/tiff/XResolution</summaryProperty>
170 | <summaryProperty>properties/image/tiff/YResolution</summaryProperty>
171 | <summaryProperty>properties/image/tiff/ResolutionUnit</summaryProperty>
172 | <summaryProperty>properties/image/tiff/ImageWidth</summaryProperty>
173 | <summaryProperty>properties/image/tiff/ImageLength</summaryProperty>
174 | <summaryProperty>properties/image/tiff/BitsPerSample</summaryProperty>
175 | <summaryProperty>properties/image/tiff/Copyright</summaryProperty>
176 | <summaryProperty>properties/image/exif/Compression</summaryProperty>
177 | <summaryProperty>properties/image/exif/Software</summaryProperty>
178 | <summaryProperty>properties/image/exif/DateTimeOriginal</summaryProperty>
179 | <summaryProperty>properties/image/exif/Model</summaryProperty>
180 | <summaryProperty>properties/image/exif/Make</summaryProperty>
181 | <summaryProperty>properties/image/exif/ShutterSpeedValue</summaryProperty>
182 | <summaryProperty>properties/image/exif/ApertureValue</summaryProperty>
183 | <summaryProperty>properties/image/exif/ISOSpeedRatings</summaryProperty>
184 | <!-- Below properties can be encoded as either sub-elements or attibutes of
185 | an rdf:Description element, so they are duplicated here to cover both cases -->
186 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/photoshop:Headline</summaryProperty>
187 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/photoshop:Credit</summaryProperty>
188 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/@photoshop:Headline</summaryProperty>
189 | <summaryProperty>properties/image/x:xmpmeta/rdf:RDF/rdf:Description/@photoshop:Credit</summaryProperty>
190 | 
191 | <!-- Schematron schema definitions -->
192 | 
193 | <schema>mh-2025-tiff-600.sch</schema>
194 | 
195 | </profile>
196 | ```
197 | 
198 | The profile is made up of the following components:
199 | 
200 | 1. One or more *extension* elements, which tell imgquad what file extensions to look for. Imgquad handles file extensions in a case-insensitive way, so *tif* covers both "rubbish.tif" and "rubbish.TIF".
201 | 2. Zero or more *ns* elements, each of which maps a namespace prefix to its corresponding uri.
202 | 3. One or more *summaryProperty* elements, which define the properties that are written to the summary file. Each summary property is expressed as an xpath expression. 
203 | 4. One or more *schema* elements, that each link a file or directory naming pattern to a Schematron file (explained in the next section).
204 | 
205 | In the example, there's only one  *schema* element, which is used for all processed images. Optionally, each *schema* element may contain *type*, *match* and *pattern* attributes, which define how a schema is linked to file or directory names inside the batch:
206 | 
207 | - If **type** is "fileName", the matching is based on the naming of an image. In case of "parentDirName" the matching uses the naming of the direct parent directory of an image.
208 | - The **match** attribute defines whether the matching pattern with the file or directory name is exact ("is") or partial ("startswith", "endswith", "contains".)
209 | - The **pattern** attribute defines a text string that is used for the match.
210 | 
211 | See the [pdfquad documentation](https://github.com/KBNLresearch/pdfquad#profiles) for an example of how these attributes are used.
212 | 
213 | ### Available profiles
214 | 
215 | Currently the following profiles are included:
216 | 
217 | |Profile|Description|
218 | |:--|:--|
219 | |mh-2025-tiff.xml|Profile for digitised medieval manuscripts.|
220 | 
221 | ## Schemas
222 | 
223 | Schemas contain the Schematron rules on which the quality assessment is based. Some background information about this type of rule-based validation can be found in [this blog post](https://www.bitsgalore.org/2012/09/04/automated-assessment-jp2-against-technical-profile). Currently the following schemas are included:
224 | 
225 | ### mh-2025-tiff-600.sch
226 | 
227 | This is a schema for digitised medieval manuscripts. It includes the following checks:
228 | 
229 | |Check|Value|
230 | |:---|:---|
231 | |Image format|TIFF|
232 | |ICC profile name|eciRGB v2|
233 | |XResolution TIFF tag|tag exists|
234 | |YResolution TIFF tag|tag exists|
235 | |XResolution value|600 (+/- 1) |
236 | |YResolution value|600 (+/- 1) |
237 | |ResolutionUnit TIFF tag|tag exists|
238 | |ResolutionUnit value|2 (inches)|
239 | |ImageWidth TIFF tag|tag exists|
240 | |ImageLength TIFF tag|tag exists|
241 | |BitsPerSample TIFF tag|tag exists|
242 | |BitsPerSample value|'8 8 8'|
243 | |ICCProfile TIFF tag|tag exists|
244 | |Copyright TIFF tag|tag exists|
245 | |NewSubfileType TIFF tag|at most 1 instance of this tag|
246 | |SubIFDs TIFF tag|tag does not exist|
247 | |Compression EXIF tag|tag exists|
248 | |Compression|1 (Uncompressed)|
249 | |Software EXIF tag|tag exists|
250 | |Software value|not empty|
251 | |DateTimeOriginal EXIF tag|tag exists|
252 | |DateTimeOriginal value|not empty|
253 | |Model EXIF tag|tag exists|
254 | |Model value|not empty|
255 | |Make EXIF tag|tag exists|
256 | |Make value|not empty|
257 | |ShutterSpeedValue EXIF tag|tag exists|
258 | |ShutterSpeedValue value|not empty|
259 | |ApertureValue EXIF tag|tag exists|
260 | |ApertureValue value|not empty|
261 | |ISOSpeedRatings EXIF tag|tag exists|
262 | |ISOSpeedRatings value|not empty|
263 | |photoshop:Headline|defined in XMP metadata as either element `rdf:RDF/rdf:Description/photoshop:Headline`, or attribute `rdf:RDF/rdf:Description/@photoshop:Headline`|
264 | |photoshop:Headline value|not empty|
265 | |photoshop:Credit|defined in XMP metadata as either element `rdf:RDF/rdf:Description/photoshop:Credit`, or attribute `rdf:RDF/rdf:Description/@photoshop:Credit`|
266 | |photoshop:Credit value|not empty|
267 | 
268 | The schema also includes an additional check on any exceptions that occurred while parsing the image, as this may indicate a corrupted file.
269 | 
270 | ### mh-2025-tiff-300.sch
271 | 
272 | This schema is identical to the mh-2025-tiff-600.sch schema, except for the checks on the XResolution and YResolution values:
273 | 
274 | |Check|Value|
275 | |:---|:---|
276 | |XResolution value|300 (+/- 1) |
277 | |YResolution value|300 (+/- 1) |
278 | 
279 | ## Output
280 | 
281 | Imgquad reports the following output:
282 | 
283 | ### Comprehensive output file (XML)
284 | 
285 | For each batch, Imgquad generates one comprehensive output file in XML format. This file contains, for each image, all extracted properties, as well as the Schematron report and the assessment status. <!-- TODO add example file [Here's an example file](./examples/pq_batchtest_001.xml).-->
286 | 
287 | ### Summary file (CSV)
288 | 
289 | This is a comma-delimited text file that summarises the analysis. At the minimum, Imgquad reports the following columns for each image:
290 | 
291 | |Column|Description|
292 | |:-----|:--|
293 | |file|Full path to the image file.|
294 | |validationSuccess|Flag with value *True* if Schematron validation was succesful, and *False* if not. A value *False* indicates that the file could not be validated (e.g. because no matching schema was found, or the validation resulted in an unexpected exception)|
295 | |validationOutcome|The outcome of the Schematron validation/assessment. Value is *Pass* if file passed all tests, and *Fail* otherwise. Note that it is automatically set to *Fail* if the Schematron validation was unsuccessful (i.e. "validationSuccess" is *False*)|
296 | |validationErrors|List of validation errors (separated by "\|" characters).|
297 | 
298 | In addition, the summary file contains additional columns with the properties that are defined by the *summaryProperty* elements in the profile.
299 | 
300 | <!-- TODO add example
301 | 
302 | Here's an example:
303 | 
304 | ``` csv
305 | ```
306 | 
307 | -->
308 | 
309 | ## Licensing
310 | 
311 | Imgquad is released under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0).
312 | 
313 | ## Useful links
314 | 
315 | - [Schematron](http://en.wikipedia.org/wiki/Schematron)
316 | 
317 | 
318 | 


--------------------------------------------------------------------------------
/imgquad/imgquad.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | 
  3 | """Image Quality Assessment for Digitisation batches
  4 | 
  5 | Johan van der Knijff
  6 | 
  7 | Copyright 2025, KB/National Library of the Netherlands
  8 | 
  9 | """
 10 | 
 11 | import sys
 12 | import os
 13 | import shutil
 14 | import time
 15 | import argparse
 16 | import csv
 17 | import logging
 18 | from lxml import etree
 19 | from . import properties
 20 | from . import schematron
 21 | from . import shared
 22 | 
 23 | __version__ = "0.1.7"
 24 | 
 25 | # Create parser
 26 | parser = argparse.ArgumentParser(description="IMaGe QUality Assessment for Digitisation batches")
 27 | 
 28 | 
 29 | def parseCommandLine():
 30 |     """Parse command line"""
 31 | 
 32 |     # Sub-parsers for process and list commands
 33 | 
 34 |     subparsers = parser.add_subparsers(help='sub-command help',
 35 |                                        dest='subcommand')
 36 |     parser_process = subparsers.add_parser('process',
 37 |                                           help='process a batch')
 38 |     parser_process.add_argument('profile',
 39 |                                 action="store",
 40 |                                 help='validation profile name (use "imgquad list" to list available profiles)')
 41 |     parser_process.add_argument('batchDir',
 42 |                                 action="store",
 43 |                                 help="batch directory")
 44 |     parser_process.add_argument('--prefixout', '-p',
 45 |                                 action="store",
 46 |                                 default='iq',
 47 |                                 help="prefix of output files")
 48 |     parser_process.add_argument('--outdir', '-o',
 49 |                                 action="store",
 50 |                                 default=os.getcwd(),
 51 |                                 help="output directory")
 52 |     parser_process.add_argument('--delimiter', '-d',
 53 |                             action="store",
 54 |                             default=';',
 55 |                             help="output delimiter")
 56 |     parser_process.add_argument('--verbose', '-b',
 57 |                                 action="store_true",
 58 |                                 default=False,
 59 |                                 help="report Schematron report in verbose format")
 60 |     parser_list = subparsers.add_parser('list',
 61 |                                         help='list available profiles and schemas')
 62 |     parser_copyps = subparsers.add_parser('copyps',
 63 |                                         help='copy default profiles and schemas to \
 64 |                                             user directory, note that this will overwrite \
 65 |                                             any user-modified versions of these files!')
 66 |     parser.add_argument('--version', '-v',
 67 |                         action="version",
 68 |                         version=__version__)
 69 | 
 70 |     # Parse arguments
 71 |     args = parser.parse_args()
 72 | 
 73 |     return args
 74 | 
 75 | 
 76 | def getFilesFromTree(rootDir, extensions):
 77 |     """Walk down whole directory tree (including all subdirectories) and
 78 |     return list of those files whose extensions match extensions list
 79 |     NOTE: directory names are disabled here!!
 80 |     implementation is case insensitive (all search items converted to
 81 |     upper case internally!
 82 |     """
 83 | 
 84 |     # Convert extensions to uppercase
 85 |     extensions = [extension.upper() for extension in extensions]
 86 |     filesList = []
 87 | 
 88 |     for dirname, dirnames, filenames in os.walk(rootDir):
 89 |         # Suppress directory names
 90 |         for subdirname in dirnames:
 91 |             thisDirectory = os.path.join(dirname, subdirname)
 92 | 
 93 |         for filename in filenames:
 94 |             if filename.startswith("._"):
 95 |                 # Ignore AppleDouble resource fork files (identified here by name)
 96 |                 pass
 97 |             else:
 98 |                 thisFile = os.path.join(dirname, filename)
 99 |                 thisExtension = os.path.splitext(thisFile)[1]
100 |                 thisExtension = thisExtension.upper().strip('.')
101 |                 if extensions[0].strip() == '*' or thisExtension in extensions:
102 |                     filesList.append(thisFile)
103 |     return filesList
104 | 
105 | 
106 | def writeXMLHeader(fileOut):
107 |     """Write XML header"""
108 |     xmlHead = "<?xml version='1.0' encoding='UTF-8'?>\n"
109 |     xmlHead += "<imgquad>\n"
110 |     with open(fileOut,"wb") as f:
111 |         f.write(xmlHead.encode('utf-8'))
112 | 
113 | 
114 | def writeXMLFooter(fileOut):
115 |     """Write XML footer"""
116 |     xmlFoot = "</imgquad>\n"
117 |     with open(fileOut,"ab") as f:
118 |         f.write(xmlFoot.encode('utf-8'))
119 | 
120 | 
121 | def processFile(file, verboseFlag, schemas):
122 |     """Process one file"""
123 | 
124 |     # Create output element for this file
125 |     fileElt = etree.Element("file")
126 | 
127 |     # Initial value of flag that indicates whether image passes or fails quality checks
128 |     validationOutcome = "Pass"
129 |     # Initial value of flag that indicates whether validation was successful
130 |     validationSuccess = False
131 | 
132 |     # Select schema based on directory or file name pattern defined in profile
133 |     schemaMatchFlag, mySchema = schematron.findSchema(file, schemas)
134 |     
135 |     # Extract properties
136 |     propertiesElt = properties.getProperties(file)
137 | 
138 |     # Validate extracted properties against schema
139 |     if schemaMatchFlag:
140 |         validationSuccess, validationOutcome, reportElt = schematron.validate(mySchema,
141 |                                                                               propertiesElt,
142 |                                                                               verboseFlag)
143 |     else:
144 |         # No schema match
145 |         validationOutcome = "Fail"
146 |         logging.warning("no schema match")
147 | 
148 |     if not validationSuccess:
149 |         logging.warning("Schematron validation was not successful")
150 | 
151 |     # Create schema and status elements
152 |     schemaElt = etree.Element("schema")
153 |     schemaElt.text = mySchema
154 |     validationSuccessElt = etree.Element("validationSuccess")
155 |     validationSuccessElt.text = str(validationSuccess)
156 |     validationOutcomeElt = etree.Element("validationOutcome")
157 |     validationOutcomeElt.text = validationOutcome
158 |     # Add all child elements to file element
159 |     fileElt.append(propertiesElt)
160 |     fileElt.append(schemaElt)
161 |     fileElt.append(validationSuccessElt)
162 |     fileElt.append(validationOutcomeElt)
163 |     if schemaMatchFlag:
164 |         fileElt.append(reportElt)
165 | 
166 |     return fileElt
167 | 
168 | 
169 | def findEltValue(element, path, ns):
170 |     """ Return text of path in element, or "n/a" if it doesn't exist """
171 |     try:
172 |         elOut = element.xpath(path, namespaces=ns)
173 |         
174 |         if len(elOut) > 0:
175 |             if type(elOut[0]) == etree._Element:
176 |                 result = elOut[0].text
177 |             elif type(elOut[0]) == etree._ElementUnicodeResult:
178 |                 result = elOut[0]
179 |         else:
180 |             result = "n/a"
181 | 
182 |     except Exception:
183 |         raise
184 |         result = "n/a"
185 |     
186 |     return result
187 | 
188 | 
189 | def main():
190 |     """Main function"""
191 | 
192 |     # Path to configuration dir (from https://stackoverflow.com/a/53222876/1209004
193 |     # and https://stackoverflow.com/a/13184486/1209004).
194 |     # TODO on Windows this should return the AppData/Local folder, does this work??
195 |     configpath = os.path.join(
196 |     os.environ.get('LOCALAPPDATA') or
197 |     os.environ.get('XDG_CONFIG_HOME') or
198 |     os.path.join(os.environ['HOME'], '.config'),
199 |     "imgquad")
200 | 
201 |      # Create config directory if it doesn't exist already
202 |     if not os.path.isdir(configpath):
203 |         os.mkdir(configpath)
204 |    
205 |     # Locate package directory
206 |     packageDir = os.path.dirname(os.path.abspath(__file__))
207 | 
208 |     # Profile and schema locations in installed package and config folder
209 |     profilesDirPackage = os.path.join(packageDir, "profiles")
210 |     schemasDirPackage = os.path.join(packageDir, "schemas")
211 |     profilesDir = os.path.join(configpath, "profiles")
212 |     schemasDir = os.path.join(configpath, "schemas")
213 | 
214 |     # Check if package profiles and schemas dirs exist
215 |     shared.checkDirExists(profilesDirPackage)
216 |     shared.checkDirExists(schemasDirPackage)
217 | 
218 |     # Copy profiles and schemas to respective dirs in config dir
219 |     if not os.path.isdir(profilesDir):
220 |         shutil.copytree(profilesDirPackage, profilesDir)
221 |     if not os.path.isdir(schemasDir):
222 |         shutil.copytree(schemasDirPackage, schemasDir)
223 | 
224 |     # Get input from command line
225 |     args = parseCommandLine()
226 |     action = args.subcommand
227 | 
228 |     if action == "process":
229 |         # Check if all profiles and schemas can be parsed
230 |         schematron.checkProfilesSchemas(profilesDir, schemasDir)
231 |         profile = os.path.basename(args.profile)
232 |         batchDir = os.path.normpath(args.batchDir)
233 |         prefixOut = args.prefixout
234 |         outDir = os.path.normpath(args.outdir)
235 |         delimiter = args.delimiter
236 |         verboseFlag = args.verbose
237 |     elif action == "list":
238 |         schematron.listProfilesSchemas(profilesDir, schemasDir)
239 |     elif action == "copyps":
240 |         shutil.copytree(profilesDirPackage, profilesDir, dirs_exist_ok=True)
241 |         msg = ("copied profiles from {} to {}").format(profilesDirPackage, profilesDir)
242 |         print(msg)
243 |         shutil.copytree(schemasDirPackage, schemasDir, dirs_exist_ok=True)
244 |         msg = ("copied schemas from {} to {}").format(schemasDirPackage, schemasDir)
245 |         print(msg)
246 |         sys.exit()
247 |     elif action is None:
248 |         print('')
249 |         parser.print_help()
250 |         sys.exit()
251 |     
252 |     # Add profilesDir to profile definition
253 |     profile = os.path.join(profilesDir, profile)
254 | 
255 |     # Check if files / directories exist
256 |     shared.checkFileExists(profile)
257 |     shared.checkDirExists(batchDir)
258 |     shared.checkDirExists(outDir)
259 | 
260 |     # Check if outDir is writable
261 |     if not os.access(outDir, os.W_OK):
262 |         msg = ("directory {} is not writable".format(outDir))
263 |         shared.errorExit(msg)
264 | 
265 |     # Batch dir name
266 |     batchDirName = os.path.basename(batchDir)
267 |     # Construct output prefix for this batch
268 |     prefixBatch = ("{}_{}").format(prefixOut, batchDirName)
269 |     
270 |     # Set up logging
271 |     logging.basicConfig(handlers=[logging.StreamHandler(sys.stdout)],
272 |                         level=logging.INFO,
273 |                         format='%(asctime)s - %(levelname)s - %(message)s')
274 | 
275 |     # Get file extensions, summary properties schema patterns and locations from profile
276 |     extensions, namespaces, summaryProperties, schemas = schematron.readProfile(profile, schemasDir)
277 | 
278 |     # Add Schematron namespace definition
279 |     namespaces["svrl"] = "http://purl.oclc.org/dsdl/svrl"
280 | 
281 |     if len(extensions) == 0:
282 |         msg = ("no file extensions defined in profile")
283 |         shared.errorExit(msg)
284 | 
285 |     # Summary file with quality check status (pass/fail) and properties that are selected in profile
286 |     summaryFile = os.path.normpath(("{}_summary.csv").format(prefixBatch))
287 |     summaryFile = os.path.join(outDir, summaryFile)
288 | 
289 |     # List with names of output properties
290 |     propertyNames = []
291 |     for property in summaryProperties:
292 |         propertyName = property.split('/')[-1]
293 |         propertyNames.append(propertyName)
294 | 
295 |     summaryHeadings = ["file", "validationSuccess", "validationOutcome", "validationErrors"] + propertyNames
296 | 
297 |     with open(summaryFile, 'w', newline='', encoding='utf-8') as fSum:
298 |         writer = csv.writer(fSum, delimiter=delimiter)
299 |         writer.writerow(summaryHeadings)
300 | 
301 |     listFiles = getFilesFromTree(batchDir, extensions)
302 |     # TODO: perhaps define extensions in profile?
303 | 
304 |     # start clock for statistics
305 |     start = time.time()
306 |     print("imgquad started: " + time.asctime())
307 | 
308 |     # Iterate over all files
309 |     fileOut = ("{}.xml").format(prefixBatch)
310 |     fileOut = os.path.join(outDir, fileOut)
311 |     writeXMLHeader(fileOut)
312 | 
313 |     for myFile in listFiles:
314 |         logging.info(("file: {}").format(myFile))
315 |         myFile = os.path.abspath(myFile)
316 |         fileResult = processFile(myFile, verboseFlag, schemas)
317 |         if len(fileResult) != 0:
318 |             validationSuccess = findEltValue(fileResult, 'validationSuccess', namespaces)
319 |             validationOutcome = findEltValue(fileResult, 'validationOutcome', namespaces)
320 |             with open(summaryFile, 'a', newline='', encoding='utf-8') as fSum:
321 |                 propValues = []
322 |                 for property in summaryProperties:
323 |                     propertyValue = findEltValue(fileResult, property, namespaces)
324 |                     propValues.append(propertyValue)
325 | 
326 |                 validationErrors = []
327 |                 
328 |                 failedAsserts = fileResult.xpath("schematronReport/svrl:schematron-output/svrl:failed-assert/svrl:text", namespaces=namespaces)
329 |                 for failedAssert in failedAsserts:
330 |                     validationErrors.append(failedAssert.text)
331 |                 validationErrorsString = '|'.join(validationErrors)
332 |                 
333 |                 writer = csv.writer(fSum, delimiter=delimiter)
334 |                 myRow = [myFile, validationSuccess, validationOutcome, validationErrorsString] + propValues
335 |                 writer.writerow(myRow)
336 |             # Convert output to XML and add to output file
337 |             outXML = etree.tostring(fileResult,
338 |                                     method='xml',
339 |                                     encoding='utf-8',
340 |                                     xml_declaration=False,
341 |                                     pretty_print=True)
342 | 
343 |             with open(fileOut,"ab") as f:
344 |                 f.write(outXML)
345 | 
346 |     writeXMLFooter(fileOut)
347 | 
348 |     # Timing output
349 |     end = time.time()
350 | 
351 |     print("imgquad ended: " + time.asctime())
352 | 
353 |     # Elapsed time (seconds)
354 |     timeElapsed = end - start
355 |     timeInMinutes = round((timeElapsed / 60), 2)
356 | 
357 |     print("Elapsed time: {} minutes".format(timeInMinutes))
358 | 
359 | 
360 | if __name__ == "__main__":
361 |     main()
362 | 


--------------------------------------------------------------------------------