├── ipmlab ├── __init__.py ├── socketserver │ ├── __init__.py │ ├── README.md │ └── server.py ├── icons │ └── ipmlab.png ├── kbapi │ ├── __init__.py │ └── sru.py ├── __main__.py ├── configure.py ├── config.py ├── mdo.py ├── fiwalk.py ├── conf │ └── config-linux.xml ├── aaru.py ├── ddrescue.py ├── pmworker.py ├── configure_linux.py └── ipmlab.py ├── setup.cfg ├── doc ├── img │ ├── finalize.png │ ├── ipmStartup.png │ ├── loadMedium.png │ ├── ipmPostSubmit.png │ ├── ipmTitlewidget.png │ ├── ipmConfirmTitle.png │ ├── ipmCreatedBatch.png │ ├── ipmlabOpenBatch.png │ ├── write-protection.png │ └── menu-shortcut-mate.png ├── illustrations-source │ ├── write-enabled.jpg │ ├── write-protected.jpg │ ├── write-enabled-zoom.jpg │ ├── write-protected-zoom.jpg │ └── write-protection.svg ├── readme.md ├── userGuide.md └── setupGuide.md ├── ipmlab-launch.py ├── ipmlab-configure.py ├── package-pypi.sh ├── .gitignore ├── setup.py ├── readme.md ├── icon-svg └── icon.svg └── LICENSE /ipmlab/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /ipmlab/socketserver/__init__.py: -------------------------------------------------------------------------------- 1 | from .server import server 2 | -------------------------------------------------------------------------------- /doc/img/finalize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/finalize.png -------------------------------------------------------------------------------- /doc/img/ipmStartup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/ipmStartup.png -------------------------------------------------------------------------------- /doc/img/loadMedium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/loadMedium.png -------------------------------------------------------------------------------- /ipmlab/icons/ipmlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/ipmlab/icons/ipmlab.png -------------------------------------------------------------------------------- /doc/img/ipmPostSubmit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/ipmPostSubmit.png -------------------------------------------------------------------------------- /doc/img/ipmTitlewidget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/ipmTitlewidget.png -------------------------------------------------------------------------------- /doc/img/ipmConfirmTitle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/ipmConfirmTitle.png -------------------------------------------------------------------------------- /doc/img/ipmCreatedBatch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/ipmCreatedBatch.png -------------------------------------------------------------------------------- /doc/img/ipmlabOpenBatch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/ipmlabOpenBatch.png -------------------------------------------------------------------------------- /doc/img/write-protection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/write-protection.png -------------------------------------------------------------------------------- /doc/img/menu-shortcut-mate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/img/menu-shortcut-mate.png -------------------------------------------------------------------------------- /doc/illustrations-source/write-enabled.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/illustrations-source/write-enabled.jpg -------------------------------------------------------------------------------- /doc/illustrations-source/write-protected.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/illustrations-source/write-protected.jpg -------------------------------------------------------------------------------- /doc/illustrations-source/write-enabled-zoom.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/illustrations-source/write-enabled-zoom.jpg -------------------------------------------------------------------------------- /doc/illustrations-source/write-protected-zoom.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KBNLresearch/ipmlab/main/doc/illustrations-source/write-protected-zoom.jpg -------------------------------------------------------------------------------- /ipmlab/kbapi/__init__.py: -------------------------------------------------------------------------------- 1 | from .sru import sru 2 | __version__ = '0.1.5' 3 | 4 | __all__ = ['sru'] 5 | __author__ = 'WillemJan Faber 2 | 4 | 5 | 6 | 7 | 10 | 11 | 12 | /dev/sdd 13 | 14 | 15 | /home/johan/test/ipmlab-test 16 | 17 | 18 | kb 19 | 20 | 22 | 127.0.0.1 23 | 24 | 26 | 65432 27 | 28 | 31 | True 32 | 33 | 36 | True 37 | 38 | 39 | /usr/bin/fiwalk 40 | 41 | 43 | ddrescue 44 | 45 | 46 | /usr/bin/aaru 47 | 48 | 49 | /usr/bin/ddrescue 50 | 51 | 52 | 512 53 | 54 | 55 | 4 56 | 57 | 58 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Setup script for Ipmlab""" 3 | 4 | import codecs 5 | import os 6 | import re 7 | from setuptools import setup, find_packages 8 | 9 | 10 | def read(*parts): 11 | """Read file and return contents""" 12 | path = os.path.join(os.path.dirname(__file__), *parts) 13 | with codecs.open(path, encoding='utf-8') as fobj: 14 | return fobj.read() 15 | 16 | 17 | def find_version(*file_paths): 18 | """Return version number from main module""" 19 | version_file = read(*file_paths) 20 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) 21 | if version_match: 22 | return version_match.group(1) 23 | raise RuntimeError("Unable to find version string.") 24 | 25 | 26 | INSTALL_REQUIRES = [ 27 | 'requests', 28 | 'setuptools', 29 | 'lxml' 30 | ] 31 | PYTHON_REQUIRES = '>=3.2' 32 | 33 | setup(name='ipmlab', 34 | packages=find_packages(), 35 | version=find_version('ipmlab', 'ipmlab.py'), 36 | license='Apache License 2.0', 37 | install_requires=INSTALL_REQUIRES, 38 | python_requires=PYTHON_REQUIRES, 39 | platforms=['linux'], 40 | description='Image Portable Media Like A Boss', 41 | long_description='Workflow software for automated imaging of portable storage media', 42 | author='Johan van der Knijff', 43 | author_email='johan.vanderknijff@kb.nl', 44 | maintainer='Johan van der Knijff', 45 | maintainer_email='johan.vanderknijff@kb.nl', 46 | url='https://github.com/KBNLresearch/ipmlab', 47 | download_url=('https://github.com/KBNLresearch/ipmlab/archive/' + 48 | find_version('ipmlab', 'ipmlab.py') + '.tar.gz'), 49 | package_data={'ipmlab': ['*.*', 'conf/*.*', 'icons/*.*']}, 50 | zip_safe=False, 51 | entry_points={'gui_scripts': [ 52 | 'ipmlab = ipmlab.ipmlab:main', 53 | 'ipmlab-configure = ipmlab.configure:main', 54 | ]}, 55 | classifiers=[ 56 | 'Programming Language :: Python :: 3',] 57 | ) 58 | -------------------------------------------------------------------------------- /ipmlab/socketserver/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Simple socket communication server 4 | Adapted from https://medium.com/python-pandemonium/python-socket-communication-e10b39225a4c 5 | Original code by Rodgers Ouma Mc'Alila 6 | """ 7 | 8 | import sys 9 | import socket 10 | import selectors 11 | import traceback 12 | import queue 13 | 14 | class server(): 15 | 16 | def start(self, host, port, messageQueue): 17 | """Start server""" 18 | # Create a TCP/IP socket 19 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 20 | 21 | # Bind the socket to the port 22 | server_address = (host, int(port)) 23 | print('Starting up on {} port {}'.format(*server_address)) 24 | sock.bind(server_address) 25 | 26 | # Listen for incoming connections 27 | sock.listen(1) 28 | 29 | while True: 30 | # Wait for a connection 31 | print('waiting for a connection') 32 | connection, client_address = sock.accept() 33 | try: 34 | print('connection from', client_address) 35 | myBytes = b'' 36 | # Receive the data in small chunks and retransmit it 37 | while True: 38 | data = connection.recv(16) 39 | myBytes += data 40 | if data: 41 | # print('sending data back to the client') 42 | connection.sendall(data) 43 | else: 44 | # print('no data from', client_address) 45 | break 46 | finally: 47 | # Decode data to string, and submit it to the queue 48 | myString = myBytes.decode('utf-8') 49 | messageQueue.put(myString) 50 | print("Closing current connection") 51 | connection.close() 52 | 53 | def main(): 54 | host = '127.0.0.1' 55 | port = 65432 56 | myServer = server() 57 | myServer.start(host, port, queue) 58 | 59 | if __name__ == "__main__": 60 | main() -------------------------------------------------------------------------------- /ipmlab/aaru.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """Wrapper module for Aaru""" 3 | 4 | import os 5 | import io 6 | import time 7 | import logging 8 | import subprocess as sub 9 | from . import config 10 | 11 | def extractData(writeDirectory, imageFileBaseName): 12 | """Extract data to disk image""" 13 | 14 | # Image file name 15 | imageFile = os.path.join(writeDirectory, imageFileBaseName + '.img') 16 | 17 | # Error log file name 18 | errorLogFile = os.path.join(writeDirectory, imageFileBaseName + '.error.log') 19 | 20 | # This flag defines how subprocesses are executed 21 | shellFlag = False 22 | 23 | args = [config.aaruBin] 24 | args.append("media") 25 | args.append("dump") 26 | args.append("--encoding") 27 | args.append("utf-8") 28 | args.append("--metadata") 29 | args.append(config.inDevice) 30 | args.append(imageFile) 31 | 32 | # Command line as string (used for logging purposes only) 33 | cmdStr = " ".join(args) 34 | 35 | # Unmount input device 36 | logging.info("unmounting input device") 37 | p1 = sub.Popen(['umount', config.inDevice], stdout=sub.PIPE, stderr=sub.PIPE, shell=False) 38 | out, errors = p1.communicate() 39 | 40 | # Run Aaru as subprocess 41 | logging.info("running Aaru") 42 | p2 = sub.Popen(args, stdout=sub.PIPE, stderr=sub.PIPE, shell=shellFlag) 43 | out, errors = p2.communicate() 44 | 45 | errorLogExists = False 46 | while not errorLogExists: 47 | time.sleep(2) 48 | errorLogExists = os.path.isfile(errorLogFile) 49 | 50 | # Read error log 51 | with io.open(errorLogFile, "r", encoding="utf-8") as eLog: 52 | eLogList = eLog.read().splitlines() 53 | eLog.close() 54 | 55 | eLogDelim = "######################################################" 56 | 57 | try: 58 | if eLogList[1].strip() == eLogDelim and eLogList[2].strip() == eLogDelim: 59 | readErrors = False 60 | else: 61 | readErrors = True 62 | except: 63 | readErrors = True 64 | 65 | # All results to dictionary 66 | dictOut = {} 67 | dictOut["imageFile"] = imageFile 68 | dictOut["cmdStr"] = cmdStr 69 | dictOut["status"] = p2.returncode 70 | dictOut["readErrors"] = readErrors 71 | 72 | return dictOut 73 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Ipmlab 2 | 3 | ## What it does 4 | 5 | Ipmlab (Image Portable Media Like A Boss ) provides a simple and straightforward way to save the content of offline portable media from the KB collection. It supports a variety of carrier types, such as DOS/Windows formatted 3.5" floppy disks, USB thumb drives and hard drives. 6 | 7 | For extracting the content these media, Ipmlab wraps around either [Ddrescue](https://www.gnu.org/software/ddrescue/) or [Aaru](https://www.aaru.app/) (in progress). 8 | 9 | The media images are saved in a simple batch structure. Each batch contains a batch manifest, which is a comma-delimited text file with basic information about each carriers, such as: 10 | 11 | - An identifier that links to a record in the KB catalogue. 12 | - A volume number (because carriers may span multiple volumes). 13 | - A True/False flag that indicates the status of ipmlab's imaging process. 14 | 15 | ## Using Ipmlab outside the KB 16 | 17 | By default, Ipmlab expects each carrier to be associated with a record in the KB catalogue by means of an identifier (PPN). This identifier is then used to fetch title information from the KB catalogue using a HTTP request. This effectively constrains the use of Ipmlab to materials in the KB collection. To overcome this constraint, you can disable the PPN lookup by setting the value of *enablePPNLookup* in the configuration file to *False*. More details can be found in the [setup and configuration documentation](./doc/setupGuide.md#enableppnlookup). If *enablePPNLookup* is disabled, the *PPN* data entry widget in Ipmlab's data entry GUI is replaced with a *Title* widget, which can be used for entering a free text description of each carrier. See also the section about [Processing media that are not part of the KB collection](./doc/userGuide.md#processing-media-that-are-not-part-of-the-kb-collection) in the User Guide. 18 | 19 | Moreover, it would be fairly straightforward to replace the PPN lookup by some alternative identifier that is linked to another catalogue/database (especially if it can be queried using HTTP-requests). 20 | 21 | ## Platform 22 | 23 | Linux only (e.g. Ubuntu, Linux Mint, etc.). 24 | 25 | ## Wrapped software 26 | 27 | Ipmlab wraps around either: 28 | 29 | - [Ddrescue](https://www.gnu.org/software/ddrescue/), or 30 | - [Aaru Data Preservation Suite](https://www.aaru.app/) (work in progress, see also [here](https://github.com/KBNLresearch/ipmlab/issues/23)) 31 | 32 | It also has a dependency on [dfxml_python](https://github.com/dfxml-working-group/dfxml_python), which must be installed separately because no PyPi package exists. 33 | 34 | ## Documentation 35 | 36 | * [Setup Guide](./doc/setupGuide.md) - covers installation, setup and configuration. 37 | * [User Guide](./doc/userGuide.md) - explains how to use Ipmlab. 38 | 39 | ## Contributors 40 | 41 | Written by Johan van der Knijff, except *sru.py* which was adapted from the [KB Python API](https://github.com/KBNLresearch/KB-python-API) which is written by Willem Jan Faber, and the socket server code which was adapted from an example in [Python Socket Communication](https://medium.com/python-pandemonium/python-socket-communication-e10b39225a4c) by Rodgers Ouma Mc'Alila. 42 | 43 | ## License 44 | 45 | Ipmlab is released under the Apache License 2.0. The KB Python API is released under the GNU GENERAL PUBLIC LICENSE. 46 | -------------------------------------------------------------------------------- /ipmlab/ddrescue.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """Wrapper module for ddrescue""" 3 | 4 | import os 5 | import time 6 | import logging 7 | import subprocess as sub 8 | from . import config 9 | 10 | def getNoReadErrors(rescueLine): 11 | """parse ddrescue output line for values of readErrors, return number 12 | of read errors""" 13 | lineItems = rescueLine.split(",") 14 | 15 | for item in lineItems: 16 | # Note that 'errors' item was renamed to 'read errors' between ddrescue 1.19 and 1.22 17 | # This should work in either case 18 | if "errors:" in item: 19 | reEntry = item.split(":") 20 | noReadErrors = int(reEntry[1].strip()) 21 | 22 | return noReadErrors 23 | 24 | 25 | def extractData(writeDirectory, imageFileBaseName): 26 | """Extract data to disk image""" 27 | 28 | # Image file name 29 | imageFile = os.path.join(writeDirectory, imageFileBaseName + '.img') 30 | 31 | # Map file name 32 | mapFile = os.path.join(writeDirectory, imageFileBaseName + '.map') 33 | 34 | # Error log file name 35 | #errorLogFile = os.path.join(writeDirectory, imageFileBaseName + '.error.log') 36 | 37 | # This flag defines how subprocesses are executed 38 | shellFlag = False 39 | 40 | # Number of read errors 41 | noReadErrors = 0 42 | 43 | # Arguments 44 | args = [config.ddrescueBin] 45 | args.append('-b') 46 | args.append(str(config.blockSize)) 47 | args.append('-r' + str(config.retries)) 48 | args.append('-v') 49 | args.append(config.inDevice) 50 | args.append(imageFile) 51 | args.append(mapFile) 52 | 53 | # Command line as string (used for logging purposes only) 54 | cmdStr = " ".join(args) 55 | 56 | # Unmount input device 57 | sub.run(['umount', config.inDevice], shell=False) 58 | 59 | # Run ddrescue as subprocess 60 | try: 61 | p = sub.Popen(args, stdout=sub.PIPE, stderr=sub.PIPE, 62 | shell=shellFlag, bufsize=1, universal_newlines=True) 63 | 64 | # Processing of output adapted from DDRescue-GUI by Hamish McIntyre-Bhatty: 65 | # https://git.launchpad.net/ddrescue-gui/tree/DDRescue_GUI.py 66 | 67 | line = "" 68 | char = " " 69 | 70 | # Give ddrescue plenty of time to start. 71 | time.sleep(2) 72 | 73 | # Grab information from ddrescue. (After ddrescue exits, attempt to keep reading chars until 74 | # the last attempt gave an empty string) 75 | while p.poll() is None or char != "": 76 | char = p.stdout.read(1) 77 | line += char 78 | 79 | # If this is the end of the line, process it, and send the results to the logger 80 | if char == "\n": 81 | tidy_line = line.replace("\n", "").replace("\r", "").replace("\x1b[A", "") 82 | 83 | if tidy_line != "": 84 | 85 | if "errors:" in tidy_line: 86 | # Parse this line for value of read errors 87 | noReadErrors = getNoReadErrors(tidy_line) 88 | 89 | # Reset line. 90 | line = "" 91 | 92 | # Parse any remaining lines afterwards. 93 | if line != "": 94 | tidy_line = line.replace("\n", "").replace("\r", "").replace("\x1b[A", "") 95 | if "errors:" in tidy_line: 96 | # Parse this line for value of read errors 97 | noReadErrors = getNoReadErrors(tidy_line) 98 | 99 | logging.info(tidy_line) 100 | 101 | p.wait() 102 | 103 | exitStatus = p.returncode 104 | 105 | except Exception: 106 | # I don't even want to to start thinking how one might end up here ... 107 | exitStatus = -99 108 | 109 | # Set readErrors flag 110 | readErrors = noReadErrors != 0 111 | 112 | # All results to dictionary 113 | dictOut = {} 114 | dictOut["imageFile"] = imageFile 115 | dictOut["cmdStr"] = cmdStr 116 | dictOut["status"] = exitStatus 117 | dictOut["readErrors"] = readErrors 118 | 119 | return dictOut 120 | -------------------------------------------------------------------------------- /icon-svg/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 23 | 25 | image/svg+xml 26 | 28 | 29 | 30 | 31 | 32 | 34 | 54 | 59 | 62 | 66 | 70 | 74 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /ipmlab/pmworker.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """This module contains the code that does the actual imaging 3 | """ 4 | 5 | import os 6 | import glob 7 | import csv 8 | import hashlib 9 | import logging 10 | from . import config 11 | from . import aaru 12 | from . import ddrescue 13 | from . import mdo 14 | from . import fiwalk 15 | 16 | 17 | def generate_file_md5(fileIn): 18 | """Generate MD5 hash of file""" 19 | 20 | # fileIn is read in chunks to ensure it will work with (very) large files as well 21 | # Adapted from: http://stackoverflow.com/a/1131255/1209004 22 | 23 | blocksize = 2**20 24 | m = hashlib.md5() 25 | with open(fileIn, "rb") as f: 26 | while True: 27 | buf = f.read(blocksize) 28 | if not buf: 29 | break 30 | m.update(buf) 31 | return m.hexdigest() 32 | 33 | 34 | def generate_file_sha512(fileIn): 35 | """Generate sha512 hash of file""" 36 | 37 | # fileIn is read in chunks to ensure it will work with (very) large files as well 38 | # Adapted from: http://stackoverflow.com/a/1131255/1209004 39 | 40 | blocksize = 2**20 41 | m = hashlib.sha512() 42 | with open(fileIn, "rb") as f: 43 | while True: 44 | buf = f.read(blocksize) 45 | if not buf: 46 | break 47 | m.update(buf) 48 | return m.hexdigest() 49 | 50 | 51 | def checksumDirectory(directory): 52 | """Calculate checksums for all files in directory""" 53 | 54 | # All files in directory 55 | allFiles = glob.glob(directory + "/*") 56 | 57 | # Dictionary for storing results 58 | checksums = {} 59 | 60 | for fName in allFiles: 61 | hashString = generate_file_sha512(fName) 62 | checksums[fName] = hashString 63 | 64 | # Write checksum file 65 | try: 66 | fChecksum = open(os.path.join(directory, "checksums.sha512"), "w", encoding="utf-8") 67 | for fName in checksums: 68 | lineOut = checksums[fName] + " " + os.path.basename(fName) + '\n' 69 | fChecksum.write(lineOut) 70 | fChecksum.close() 71 | wroteChecksums = True 72 | except IOError: 73 | wroteChecksums = False 74 | 75 | return wroteChecksums 76 | 77 | 78 | def processMedium(carrierData): 79 | """Process one medium/carrier""" 80 | 81 | jobID = carrierData['jobID'] 82 | PPN = carrierData['PPN'] 83 | 84 | logging.info(''.join(['### Job identifier: ', jobID])) 85 | logging.info(''.join(['PPN: ', carrierData['PPN']])) 86 | logging.info(''.join(['Title: ', carrierData['title']])) 87 | logging.info(''.join(['Volume number: ', carrierData['volumeNo']])) 88 | 89 | # Initialise success status 90 | success = True 91 | 92 | # Create output folder for this medium 93 | dirMedium = os.path.join(config.batchFolder, jobID) 94 | logging.info(''.join(['medium directory: ', dirMedium])) 95 | if not os.path.exists(dirMedium): 96 | os.makedirs(dirMedium) 97 | 98 | logging.info('*** Extracting data using ' + config.imagingApplication + ' ***') 99 | 100 | if config.imagingApplication == "aaru": 101 | 102 | resultAaru = aaru.extractData(dirMedium, jobID) 103 | imageFile = resultAaru["imageFile"] 104 | statusAaru = resultAaru["status"] 105 | readErrors = resultAaru["readErrors"] 106 | 107 | logging.info(''.join(['aaru command: ', resultAaru['cmdStr']])) 108 | logging.info(''.join(['aaru-status: ', str(resultAaru['status'])])) 109 | 110 | if statusAaru != 0: 111 | success = False 112 | logging.error("Aaru exited with abnormal exit status") 113 | 114 | if readErrors: 115 | success = False 116 | logging.error("Aaru dumping resulted in read error(s)") 117 | 118 | elif config.imagingApplication == "ddrescue": 119 | resultDdrescue = ddrescue.extractData(dirMedium, jobID) 120 | imageFile = resultDdrescue["imageFile"] 121 | statusDdrescue = resultDdrescue["status"] 122 | readErrors = resultDdrescue["readErrors"] 123 | 124 | logging.info(''.join(['ddrescue command: ', resultDdrescue['cmdStr']])) 125 | logging.info(''.join(['ddrescue status: ', str(resultDdrescue['status'])])) 126 | 127 | if statusDdrescue != 0: 128 | success = False 129 | logging.error("Ddrescue exited with abnormal exit status") 130 | 131 | if readErrors: 132 | success = False 133 | logging.error("Ddrescue dumping resulted in read error(s)") 134 | 135 | # Generate dfxml metadata and store as file 136 | logging.info('*** Generating dfxml metadata ***') 137 | resultFiwalk = fiwalk.runFiwalk(dirMedium, jobID) 138 | statusFiwalk = resultFiwalk["status"] 139 | 140 | logging.info(''.join(['fiwalk command: ', resultFiwalk['cmdStr']])) 141 | logging.info(''.join(['fiwalk status: ', str(resultFiwalk['status'])])) 142 | 143 | if statusFiwalk != 0: 144 | success = False 145 | logging.error("Fiwalk exited with abnormal exit status") 146 | 147 | if config.enablePPNLookup: 148 | # Fetch metadata from KBMDO and store as file 149 | logging.info('*** Writing metadata from KB-MDO to file ***') 150 | 151 | successMdoWrite = mdo.writeMDORecord(PPN, dirMedium) 152 | if not successMdoWrite: 153 | success = False 154 | logging.error("Could not write metadata from KB-MDO") 155 | 156 | # Generate checksum file 157 | logging.info('*** Computing checksums ***') 158 | successChecksum = checksumDirectory(dirMedium) 159 | 160 | if not successChecksum: 161 | success = False 162 | logging.error("Writing of checksum file resulted in an error") 163 | 164 | # Create comma-delimited batch manifest entry for this carrier 165 | 166 | # Put all items for batch manifest entry in a list 167 | rowBatchManifest = ([jobID, 168 | carrierData['PPN'], 169 | carrierData['volumeNo'], 170 | carrierData['title'], 171 | str(success), 172 | str(readErrors)]) 173 | 174 | # Open batch manifest in append mode 175 | bm = open(config.batchManifest, "a", encoding="utf-8") 176 | 177 | # Create CSV writer object 178 | csvBm = csv.writer(bm, lineterminator='\n') 179 | 180 | # Write row to batch manifest and close file 181 | csvBm.writerow(rowBatchManifest) 182 | bm.close() 183 | 184 | logging.info('*** Finished processing medium ***') 185 | 186 | # Set finishedMedium flag 187 | config.finishedMedium = True 188 | 189 | return success -------------------------------------------------------------------------------- /ipmlab/configure_linux.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """Post-install / configuration for Ipmlab on Linux""" 3 | 4 | import os 5 | import sys 6 | import io 7 | import site 8 | from shutil import copyfile 9 | import logging 10 | 11 | 12 | def errorExit(msg): 13 | """Show error message and exit""" 14 | sys.stderr.write("Error: " + msg + "\n") 15 | sys.exit(1) 16 | 17 | 18 | def writeDesktopFiles(applicationsDir, packageDir): 19 | """Creates desktop file in /usr/share/applications""" 20 | 21 | # Needed to change file permissions 22 | sudoUID = os.environ.get('SUDO_UID') 23 | sudoGID = os.environ.get('SUDO_GID') 24 | 25 | # Full path to config and launcher scripts 26 | pathName = os.path.abspath(os.path.dirname(sys.argv[0])) 27 | 28 | # Locate icon file in package 29 | iconFile = os.path.join(packageDir, 'icons', 'ipmlab.png') 30 | if not os.path.isfile(iconFile): 31 | msg = 'cannot find icon file' 32 | errorExit(msg) 33 | 34 | fApplications = os.path.join(applicationsDir, 'ipmlab.desktop') 35 | 36 | # List of desktop file lines 37 | desktopList = [] 38 | desktopList.append('[Desktop Entry]') 39 | desktopList.append('Type=Application') 40 | desktopList.append('Encoding=UTF-8') 41 | desktopList.append('Name=ipmlab') 42 | desktopList.append('Comment=Image Portable Media Like A Boss') 43 | desktopList.append('Exec=' + os.path.join(pathName, 'ipmlab')) 44 | desktopList.append('Icon=' + iconFile) 45 | desktopList.append('Terminal=false') 46 | desktopList.append('Categories=Utility;System;GTK') 47 | 48 | # Write desktop file to applications directory 49 | try: 50 | logging.info('creating desktop file ' + fApplications) 51 | with io.open(fApplications, 'w', encoding='utf-8') as fA: 52 | for line in desktopList: 53 | fA.write(line + '\n') 54 | except: 55 | msg = 'Failed to create ' + fApplications 56 | errorExit(msg) 57 | 58 | 59 | def post_install(): 60 | """Install config file + pre-packaged tools to user dir + 61 | Create a Desktop shortcut to the installed software 62 | 63 | Creates the following items: 64 | - configuration directory ipmlab in ~/.config/ or /etc/ 65 | - configuration file in configuration directory 66 | - desktop file in ~/.local/share/applications/ or /usr/share/applications 67 | """ 68 | 69 | # Get evironment variables 70 | sudoUser = os.environ.get('SUDO_USER') 71 | 72 | # Package name 73 | packageName = 'ipmlab' 74 | 75 | # Scripts directory (location of launcher script) 76 | scriptsDir = os.path.split(sys.argv[0])[0] 77 | 78 | logging.info("Scripts directory: " + scriptsDir) 79 | 80 | # Package directory (parent of scriptsDir) 81 | #packageDir = os.path.abspath(os.path.join(scriptsDir, os.pardir)) 82 | packageDir = os.path.dirname(os.path.abspath(__file__)) 83 | 84 | logging.info("Package directory: " + packageDir) 85 | 86 | # Current home directory 87 | try: 88 | # If executed as root, return normal user's home directory 89 | homeDir = os.path.normpath('/home/'+ sudoUser) 90 | except TypeError: 91 | # sudoUser doesn't exist if not executed as root 92 | homeDir = os.path.normpath(os.path.expanduser("~")) 93 | 94 | logging.info("Home directory: " + homeDir) 95 | 96 | # Get locations of configRootDir and applicationsDir, 97 | # depending of install type (which is inferred from packageDir) 98 | 99 | if packageDir.startswith(homeDir): 100 | # Local install: store everything in user's home dir 101 | globalInstall = False 102 | configRootDir = os.path.join(homeDir, '.config/') 103 | applicationsDir = os.path.join(homeDir, '.local/share/applications/') 104 | else: 105 | # Global install 106 | globalInstall = True 107 | configRootDir = os.path.normpath('/etc/') 108 | applicationsDir = os.path.normpath('/usr/share/applications') 109 | 110 | logging.info("Applications directory: " + applicationsDir) 111 | 112 | # Create applicationsDir and configRootDir if they don't exist already 113 | if not os.path.isdir(configRootDir): 114 | os.mkdir(configRootDir) 115 | if not os.path.isdir(applicationsDir): 116 | os.mkdir(applicationsDir) 117 | 118 | # For a global installation this script must be run as root 119 | if globalInstall and sudoUser is None: 120 | msg = 'this script must be run as root for a global installation' 121 | errorExit(msg) 122 | 123 | # Check if directories exist and that they are writable 124 | if not os.access(configRootDir, os.W_OK | os.X_OK): 125 | msg = 'cannot write to ' + configRootDir 126 | errorExit(msg) 127 | 128 | if not os.access(applicationsDir, os.W_OK | os.X_OK): 129 | msg = 'cannot write to ' + applicationsDir 130 | errorExit(msg) 131 | 132 | # Create configuration directory if it doesn't already exist 133 | configDir = os.path.join(configRootDir, packageName) 134 | 135 | logging.info("Configuration directory: " + configDir) 136 | 137 | if not os.path.isdir(configDir): 138 | os.mkdir(configDir) 139 | 140 | # Path to configuration file 141 | fConfig = os.path.join(configDir, 'config.xml') 142 | 143 | if not os.path.isfile(fConfig): 144 | # No existing config file at destination, so copy from package. 145 | # Location is /ipmlab/conf/config.xml in 'site-packages' directory 146 | # if installed with pip) 147 | 148 | logging.info("Copying configuration file ...") 149 | 150 | # Locate global site-packages dir (this returns multiple entries) 151 | sitePackageDirsGlobal = site.getsitepackages() 152 | 153 | sitePackageDirGlobal = "" 154 | 155 | # Assumptions: site package dir is called 'site-packages' and is 156 | # unique (?) 157 | for directory in sitePackageDirsGlobal: 158 | if 'site-packages' in directory: 159 | sitePackageDirGlobal = directory 160 | 161 | logging.info("Global site package directory: " + sitePackageDirGlobal) 162 | 163 | # Locate user site-packages dir 164 | sitePackageDirUser = site.getusersitepackages() 165 | logging.info("User site package directory: " + sitePackageDirUser) 166 | 167 | # Determine which site package dir to use 168 | # Convert to lowercase because result of site.getsitepackages() 169 | # sometimes results in lowercase output (observed with Python 3.7 on Windows 10) 170 | if sitePackageDirGlobal != "" and sitePackageDirGlobal.lower() in packageDir.lower(): 171 | sitePackageDir = sitePackageDirGlobal 172 | elif sitePackageDirUser.lower() in packageDir.lower(): 173 | sitePackageDir = sitePackageDirUser 174 | else: 175 | msg = 'could not establish package dir to use' 176 | errorExit(msg) 177 | logging.info("Site package directory: " + sitePackageDir) 178 | 179 | # Construct path to config file 180 | configFilePackage = os.path.join(sitePackageDir, packageName, 181 | 'conf', 'config-linux.xml') 182 | 183 | if os.path.isfile(configFilePackage): 184 | try: 185 | copyfile(configFilePackage, fConfig) 186 | logging.info("copied configuration file!") 187 | except IOError: 188 | msg = 'could not copy configuration file to ' + fConfig 189 | errorExit(msg) 190 | # This should never happen but who knows ... 191 | else: 192 | msg = 'no configuration file found in package' 193 | errorExit(msg) 194 | 195 | writeDesktopFiles(applicationsDir, packageDir) 196 | 197 | sys.stdout.write('Ipmlab configuration completed successfully!\n') 198 | sys.exit(0) 199 | 200 | 201 | def main(): 202 | """Main function""" 203 | 204 | # Logging configuration 205 | logging.basicConfig(level=logging.INFO, 206 | format='%(asctime)s - %(levelname)s - %(message)s') 207 | post_install() 208 | 209 | 210 | if __name__ == "__main__": 211 | main() 212 | -------------------------------------------------------------------------------- /doc/userGuide.md: -------------------------------------------------------------------------------- 1 | # Ipmlab User Guide 2 | 3 | This User Guide assumes that Ipmlab and its dependencies have been installed and configure. If this is not the case, consult the [Setup Guide](./setupGuide.md) first. 4 | 5 | ## Getting started 6 | 7 | Launch Ipmlab from your operating system's launcher menu. For Linux Mint (MATE), you can find an Ipmlab launcher in the "System Tools" menu, as shown below (notice the floppy-shaped icon): 8 | 9 | ![](./img/menu-shortcut-mate.png) 10 | 11 | After clicking on it, the following window appears: 12 | 13 | ![](./img/ipmStartup.png) 14 | 15 | Upon startup, you have three options: 16 | 17 | * Create a *New* batch (see below) 18 | * *Open* an existing batch 19 | * *Quit* Ipmlab 20 | 21 | ## Create a batch 22 | 23 | Let's create a new batch. Click on the top-left *New* button. Ipmlab will respond with: 24 | 25 | ![](./img/ipmCreatedBatch.png) 26 | 27 | Now press *OK*. The console widget at the bottom of the Ipmlab window now shows the full path to the newly created batch folder. 28 | 29 | ### Naming of batches 30 | 31 | The batch name is automatically generated. It contains of a prefix (defined by the *prefixBatch* variable in Ipmlab's configuration file), followed by a [Universally Unique Identifier](https://en.wikipedia.org/wiki/Universally_unique_identifier), which is based on the hardware address and the current time ("version 1" UUID). This ensures that batch names are always unique if Ipmlab is run on different machines in parallel. 32 | 33 | ## Process a carrier 34 | 35 | Now let's process a carrier (such as a floppy disk or thumb drive). We'll assume here that the *enablePPNLookup* flag in the configuration file is set to *True*: the carrier is part of the KB collection, and has a known PPN identifier associated with it. In this example we'll assume we're using the "INP spellingschijf" 3.5" floppy disk. The catalogue record can of this electronic publication can be found here: 36 | 37 | 38 | 39 | We start by entering the required fields: 40 | 41 | * *PPN* is the PPN that is associated with the carrier (here: *144082667*). 42 | * Leave *Volume number* at the default value of *1* (the assignment of volume numbers and how they are related to carrier type is explained further below). 43 | 44 | Now press the *Submit* button. Ipmlab now tries to look up up the entered *PPN* in the catalogue. If a matching record is found it will display the corresponding title, and ask for confirmation: 45 | 46 | ![](./img/ipmConfirmTitle.png) 47 | 48 | If the displayed title doesn't match your item (because you accidentally entered the wrong *PPN*), you can press *No*, enter the (correct) *PPN* and then pres *Submit* again. Once the title is as expected, press *Yes*. Another dialog pops up: 49 | 50 | ![](./img/loadMedium.png) 51 | 52 | Before loading a floppy, always verify that its write-protect tab is in the **open** (protected) position! See also the figure below: 53 | 54 | ![](./img/write-protection.png) 55 | 56 | Now load the first floppy into the floppy reader. After that, press *OK*. The details (*PPN*, title) of the carrier are added as an entry to the widget in the centre of the *Ipmlab* window: 57 | 58 | ![](./img/ipmPostSubmit.png) 59 | 60 | Subsequently Ipmlab starts processing the floppy. This involves the following steps: 61 | 62 | 1. Extract the contents of the medium to an image file using Aaru (Aaru also creates a metadata sidecar files and some other files). 63 | 2. Compute SHA-512 checksums for all generated files. 64 | 3. Add an entry for the carrier in the *batch manifest* (explained further below). 65 | 66 | ## Process more carriers 67 | 68 | To process additional carriers, simply repeat the steps from the previous section. For multi-volume PPNs you can use the *Use previous* button. After pressing it, you will see the most recently submitted *PPN* in the *PPN* entry widget, and the *Volume number* widget increases the previously entered value by 1. 69 | 70 | ## Finalize a batch 71 | 72 | When you're done entering new carriers, press the *Finalize* button at the top of the Ipmlab window. This will trigger a confirmation dialog: 73 | 74 | ![](./img/finalize.png) 75 | 76 | Then press *Yes*. After a short while another information dialog will pop up telling you that Ipmlab has finished processing this batch. After pressing *OK*, Ipmlab will reset to its initial state. You can now create a new batch, open an existing one, or exit *Ipmlab*. 77 | 78 | ## Quitting Ipmlab 79 | 80 | If you press the *Quit* button, Ipmlab will quit after first finish the processing of the current carrier. 81 | 82 | ## Opening an existing batch 83 | 84 | After pressing the *Open* button upon startup, you will see a file dialog that shows all batch folders in Ipmlab's root directory (*rootDir*): 85 | 86 | ![](./img/ipmlabOpenBatch.png) 87 | 88 | This allows you to continue a batch that was interrupted with the *Quit* command. 89 | 90 | ## enableSocketAPI option 91 | 92 | The *enableSocketAPI* option allows one to send *PPN* or *Title* values to the corresponding Ipmlab entry widgets from an external application through a [socket connection](https://en.wikipedia.org/wiki/Network_socket). It can be activated by setting the value of *enableSocketAPI* in the configuration file to *True*: 93 | 94 | ```xml 95 | True 96 | ``` 97 | 98 | When this option is activated, Ipmlab launches a server that listens on a user-defined host address (default: localhost) and port number (default: 65432) combination for incoming requests. This is particularly useful if the *PPN* identifiers or titles are entered from some external database application. In order to communicate with Ipmlab, this application needs to be able to send socket requests. This [Iromlab socket client demo](https://github.com/KBNLresearch/iromlab-socketclient) shows how to do this in Python. 99 | 100 | ## All carriers of a PPN must be in same batch 101 | 102 | Carriers that belong to one particular *PPN* must *always* be in the same batch. This is because the batches are processed into ingest-ready Submission Information Packages (SIPs) further down the processing chain, and all carriers that are part of a *PPN* are grouped into one SIP. This doesn't work if a *PPN* is spread across multiple batches. 103 | 104 | ## Processing carriers that are not part of the KB collection 105 | 106 | For carriers that are not part of the KB collection, it is recommended to set the *enablePPNLookup* flag in Ipmlab's configuration file to *False*: 107 | 108 | ```xml 109 | False 110 | ``` 111 | 112 | With this setting, the *PPN* widget in the Ipmlab interface is replaced by a *Title* entry widget. You can use it to manually enter a title (or other description) for each carrier: 113 | 114 | ![](./img/ipmTitlewidget.png) 115 | 116 | ## The batch manifest 117 | 118 | The batch manifest is a comma-delimited text file named *manifest.csv* which is located at the root of a batch. It contains all information that is needed to process the batch into ingest-ready Submission Information Packages further down the processing chain. For each processed carrier, it contains the following fields: 119 | 120 | jobID,PPN,volumeNo,title,success,readErrors 121 | 122 | 1. *jobID* - internal carrier-level identifier. The image file(s) of this carrier are stored in an eponymous directory within the batch. 123 | 2. *PPN* - identifier of the physical item in the KB Collection to which this carrier belongs. For the KB case this is the PPN identifier in the KB catalogue. If *enablePPNLookup* is set to *False*, it will be an empty (zero-length) string. 124 | 3. *volumeNo* - for intellectual entities that span multiple carriers, this defines the volume number (1 for single-volume items). 125 | 4. *title* - text string with the title of the carrier (or the publication it is part of). If *enablePPNLookup* is *True* the title field is extracted from the KB catalogue record. If *enablePPNLookup* is *False* the manually entered *Title* value is used. 126 | windows/win32/api/winioctl/ns-winioctl-get_media_types). 127 | 5. *success* - True/False flag that indicates whether the imaging was completed successfully. A *False* value indicates problems. 128 | 6. *readErrors* - a True/False flag that indicates whether Ddrescue or Aaru encountered read errors. 129 | 130 | The first line of the file contains column headers. 131 | 132 | Example: 133 | 134 | ```csv 135 | jobID,PPN,volumeNo,title,success,readErrors 136 | ce5eca7e-f179-11ec-853c-0800272c26ff,144082667,1,INP spellingschijf,True,False 137 | d79c52c1-f179-11ec-9f9f-0800272c26ff,144082667,2,INP spellingschijf,True,False 138 | ``` 139 | 140 | ## The log file 141 | 142 | Each batch contains a log file *batch.log*. It contains detailed information about the detection and imaging subprocesses. If anything unexpected happens, checking the batch log will help you identify the problem. 143 | 144 | ## The End Of Batch file 145 | 146 | After a batch is finalized, Ipmlab writes an "end of batch file (*eob.txt*) to the batch folder. It contains the text string "EOB". This file is added primarily for post-imaging quality control reasons. 147 | 148 | ## The version file 149 | 150 | Each batch contains a file *version.txt*, which holds the Ipmlab version number. 151 | 152 | ## Created files for each carrier 153 | 154 | For each carrier, Ipmlab creates a folder in the batch folder. The name of each folder is (again) a [Universally Unique Identifier](https://en.wikipedia.org/wiki/Universally_unique_identifier), which is based on the hardware address and the current time ("version 1" UUID). Each of these folders contain the following files (with a base name that corresponds to the UUID): 155 | 156 | - *xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.img* - image file (file name follows UUID). 157 | - *dfxml.xml* - report file in [Digital Forensics XML format](https://en.wikipedia.org/wiki/Digital_Forensics_XML); includes listing of all files on the carrier. 158 | - *meta-kbmdo.xml* - bibliographic metadata from KB catalogue (only if *enablePPNLookup* is enabled). 159 | - *checksums.sha512* - checksum file with SHA-512 hashes of all files in this directory. 160 | 161 | If Ddrescue is used as the imaging application, the following additional file is generated: 162 | 163 | - *xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.map* - [ddrescue map file](https://www.gnu.org/software/ddrescue/manual/ddrescue_manual.html#Mapfile-structure). 164 | 165 | When Aaru is used, this results in the following additional files: 166 | 167 | - *xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.cicm.xml* - Aaru metadata file. Contains various checksums, and filesystem and file-level metadata. 168 | - *xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.resume.xml* - Aaru resume mapfile (analogous to ddrescue map file). 169 | - *xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.log* - Aaru dump log. 170 | - *xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.error.log* - Aaru error log. 171 | - *xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx.ibg* - undocument Aaru file (this looks like some [ImageBurn-specific format](https://forum.imgburn.com/topic/15561-issues-reating-audio-cd/?do=findComment&comment=121649)). 172 | - Various files ending with a *.bin* file extension - these are written by Aaru (but they are all undocumented, don't know if we should keep them?). 173 | 174 | 175 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /doc/illustrations-source/write-protection.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 23 | 25 | 33 | 38 | 39 | 47 | 52 | 53 | 61 | 67 | 68 | 69 | 88 | 90 | 91 | 93 | image/svg+xml 94 | 96 | 97 | 98 | 99 | 100 | 105 | 114 | 123 | 132 | 141 | Write enabled(tab closed) 157 | Write protected(tab open) 173 | 180 | 187 | 194 | 201 | 206 | 211 | 216 | 221 | 222 | 223 | -------------------------------------------------------------------------------- /doc/setupGuide.md: -------------------------------------------------------------------------------- 1 | # Ipmlab Setup Guide 2 | 3 | Before trying to set up Ipmlab, check if the following requirements are met: 4 | 5 | * The installation platform is Linux-based (e.g. Unbuntu or Linux Mint). 6 | * Python 3.8 (or more recent) is installed on the target platform. Older 3.x versions *may* (but are not guaranteed to) work. 7 | 8 | Getting Ipmlab up running requires a number of installation and configuration steps: 9 | 10 | 1. Update the package index 11 | 1. Add user to "disk" group 12 | 1. Disable automatic mounting of removable media 13 | 1. Install Tkinter if it is not installed already 14 | 1. Install pip if it is not installed already 15 | 1. Install Sleuthkit 16 | 1. Install either ddrescue, or the [Aaru Data Preservation Suite](https://www.aaru.app/) software (and configure it), or both (note: Aaru is not working as of yet!). 17 | 1. Install Ipmlab 18 | 1. Configure Ipmlab 19 | 20 | Each step is described in detail below. 21 | 22 | ## Update package index 23 | 24 | As we'll be installing a few Debian packages, it's a good idea to first update the package index, to ensure the most recent versions of all packages are installed: 25 | 26 | ```bash 27 | sudo apt-get update 28 | ``` 29 | 30 | ## Add user to disk group 31 | 32 | For Linux, in order to have access to block devices as a non-root user, you must add your user name to the disk group. You can do this with the command below: 33 | 34 | ```bash 35 | sudo adduser $USER disk 36 | ``` 37 | 38 | The user is now added to the 'disk' system group. Now log out, and then log in again for the changes to take effect. 39 | 40 | ## Disable automatic mounting of removable media 41 | 42 | In order to minimise any risks of accidental write actions to e.g. floppy disks that are processed with Ipmlab, it is strongly suggested to disable automatic mounting of removable media. The exact command depends on the Linux desktop you're using. For the [MATE](https://mate-desktop.org/) desktop use this: 43 | 44 | ```bash 45 | gsettings set org.mate.media-handling automount false 46 | ``` 47 | 48 | For a [GNOME](https://www.gnome.org/) desktop use this command: 49 | 50 | ```bash 51 | gsettings set org.gnome.desktop.media-handling automount false 52 | ``` 53 | 54 | And for the [Cinnamon](https://projects.linuxmint.com/cinnamon/) desktop: 55 | 56 | ```bash 57 | gsettings set org.cinnamon.desktop.media-handling automount-open false 58 | ``` 59 | 60 | You can use the below command to verify the automount setting (MATE): 61 | 62 | ```bash 63 | gsettings get org.mate.media-handling automount 64 | ``` 65 | 66 | Or, for GNOME: 67 | 68 | ```bash 69 | gsettings get org.gnome.desktop.media-handling automount 70 | ``` 71 | 72 | And finally for Cinnamon: 73 | 74 | ```bash 75 | gsettings get org.cinnamon.desktop.media-handling automount-open 76 | ``` 77 | 78 | If all goes well, this will result in: 79 | 80 | ``` 81 | false 82 | ``` 83 | 84 | Please be aware that disabling the automount feature does not provide tamper-proof write blocking! It only works at the level of the operating system's default file manager, and it won't keep a user from manually mounting a device. Also, the *gsettings* command only works at the user level. This means that for someone who logs in with a different user name, the default automount setting applies (which means automount will be enabled). 85 | 86 | If possible, use a forensic write blocker if more robust write-blocking is needed, but note that these devices [may not always work as expected](https://github.com/KBNLresearch/ipmlab/issues/26) for USB adapter devices (such as USB 3.5" floppy drives). 87 | 88 | ## Install Tkinter 89 | 90 | You may need to install Tkinter, if it is not installed already. You can install it using the OS's package manager (there is no PyInstaller package for Tkinter). If you're using *apt* this should work: 91 | 92 | ```bash 93 | sudo apt install python3-tk 94 | ``` 95 | 96 | ## Install pip 97 | 98 | You need Pip to install Python packages. Use this command to install it: 99 | 100 | ```bash 101 | sudo apt install python3-pip 102 | 103 | ``` 104 | 105 | ## Install Sleuthkit 106 | 107 | We need Sleuthkit for extracting Dfxml metadata. To install, use: 108 | 109 | ```bash 110 | sudo apt install sleuthkit 111 | ``` 112 | 113 | ## Install Ddrescue 114 | 115 | Install ddrescue using this command: 116 | 117 | ```bash 118 | sudo apt install gddrescue 119 | ``` 120 | 121 | ## Install Aaru 122 | 123 | Not supported yet, coming soon! 124 | 125 | 165 | 166 | ## Install Ipmlab 167 | 168 | The recommended way to install Ipmlab is to use *pip3*, as this will automatically install any Python packages that are used by Ipmlab (with the exception of dfxml_python, which was explained above). 169 | 170 | For a single-user installation, install using: 171 | 172 | ```bash 173 | pip3 install --user ipmlab 174 | ``` 175 | 176 | For a global installation (this allows all users on the machine to use Ipmlab), use this (this might require sudo privilege): 177 | 178 | ```bash 179 | pip3 install ipmlab 180 | ``` 181 | 182 | ## Configuration 183 | 184 | Before Ipmlab is ready for use you need to configure it. 185 | 186 | If you installed Ipmlab as a global install, just enter: 187 | 188 | ```bash 189 | ipmlab-configure 190 | ``` 191 | 192 | For a user install, you may need to enter the full path to the configuration script: 193 | 194 | ```bash 195 | ~/.local/bin/ipmlab-configure 196 | ``` 197 | 198 | The output should look something like this: 199 | 200 | ``` 201 | 2022-11-15 16:37:53,460 - INFO - Scripts directory: /home/johan/.local/bin 202 | 2022-11-15 16:37:53,460 - INFO - Package directory: /home/johan/.local 203 | 2022-11-15 16:37:53,460 - INFO - Home directory: /home/johan 204 | 2022-11-15 16:37:53,460 - INFO - Applications directory: /home/johan/.local/share/applications/ 205 | 2022-11-15 16:37:53,460 - INFO - Configuration directory: /home/johan/.config/ipmlab 206 | 2022-11-15 16:37:53,461 - INFO - Copying configuration file ... 207 | 2022-11-15 16:37:53,461 - INFO - Global site package directory: 208 | 2022-11-15 16:37:53,461 - INFO - User site package directory: /home/johan/.local/lib/python3.8/site-packages 209 | 2022-11-15 16:37:53,461 - INFO - Site package directory: /home/johan/.local/lib/python3.8/site-packages 210 | 2022-11-15 16:37:53,461 - INFO - copied configuration file! 211 | 2022-11-15 16:37:53,461 - INFO - creating desktop file /home/johan/.local/share/applications/ipmlab.desktop 212 | Ipmlab configuration completed successfully! 213 | ``` 214 | 215 | ## Editing the configuration file 216 | 217 | The automatically generated configuration file needs some further manual editing, which is explained in the sections below. 218 | 219 | ### Configuration file location 220 | 221 | If you did a user install, the configuration file is located at: 222 | 223 | ``` 224 | ~/.config/ipmlab/config.xml 225 | ``` 226 | 227 | For a global install, you can find it here: 228 | 229 | ``` 230 | /etc/ipmlab/config.xml 231 | ``` 232 | 233 | ### Configuration variables 234 | 235 | Now open the configuration file *config.xml* in a text editor, or, alternatively, use a dedicated XML editor. Carefully go through all the variables (which are defined as XML elements), and modify them if necessary. Here is an explanation of all variables. 236 | 237 | #### inDevice 238 | 239 | This defines the path to the device you want to use for imaging. You need to use a device path such as: 240 | 241 | ```xml 242 | /dev/sdd 243 | ``` 244 | 245 | If you're not sure about the device path to use, do this: 246 | 247 | 1. Make sure the floppy drive is connected to your machine, with a floppy inserted. 248 | 2. Then issue the following command to get info about all available storage devices: 249 | 250 | ```bash 251 | sudo lshw -short -class disk 252 | ``` 253 | 254 | Example output: 255 | 256 | ``` 257 | H/W path Device Class Description 258 | ==================================================== 259 | /0/13/0.0.0 /dev/sda disk 1TB TOSHIBA DT01ACA1 260 | /0/14/0.0.0 /dev/sdb disk 250GB WDC WDS250G2B0A 261 | /0/15/0.0.0 /dev/sdc disk 5TB Expansion HDD 262 | /0/16/0.0.0 /dev/sdd disk 1474KB USB-FDU 263 | ``` 264 | 265 | In the list of output devices, look for a device with a small (typically 1474 or 737 KB) storage capacity. In the example above `/dev/sdd` is the floppy drive. 266 | 267 | #### rootDir 268 | 269 | This defines the root directory where Ipmlab will write its data. Ipmlab output is organised into *batches*, and each batch is written to *rootDir*. Make sure to pick an existing directory with plenty of space. 270 | 271 | Example: 272 | 273 | ```xml 274 | /home/johan/test/ipmlab-test 275 | ``` 276 | 277 | #### prefixBatch 278 | 279 | This is a text prefix that is added to the automatically-generated batch names: 280 | 281 | ```xml 282 | kb 283 | ``` 284 | 285 | #### socketHost 286 | 287 | Defines the host address that is used if the socket API is enabled (see below). Use 127.0.0.1 for localhost: 288 | 289 | ```xml 290 | 127.0.0.1 291 | ``` 292 | 293 | #### socketPort 294 | 295 | Defines the port that is used if the socket API is enabled (see below): 296 | 297 | ```xml 298 | 65432 299 | ``` 300 | 301 | #### enablePPNLookup 302 | 303 | Flag that controls whether PPN lookup is enabled. If set to *True*, the Ipmlab interface contains a widget for entering a *PPN* identifier. After submitting, Ipmlab then performs a lookup on the PPN in the KB catalogue, and automatically extracts the title of the corresponding entry (which is then added to the batch manifest). If set to *False*, the Ipmlab interface contains a widget in which an operator can manually enter a *Title* string; the entered value is written to the batch manifest. In this case no PPN lookup is performed, and the PPN-value in the batch manifest will be a zero-length string. 304 | 305 | Allowed values: 306 | 307 | ```xml 308 | True 309 | ``` 310 | 311 | and: 312 | 313 | ```xml 314 | False 315 | ``` 316 | 317 | #### enableSocketAPI 318 | 319 | This is a flag that -if set to *True*- enables Ipmlab to pick up Title and PPN info from a client application through a socket interface (disabled by default): 320 | 321 | ```xml 322 | False 323 | ``` 324 | 325 | #### fiwalkBin 326 | 327 | This points to the location of Fiwalk binary: 328 | 329 | ```xml 330 | /usr/bin/fiwalk 331 | ``` 332 | 333 | #### imagingApplication 334 | 335 | This sets the application that is used for imaging. Allowed values are "aaru" and "ddrescue": 336 | 337 | ```xml 338 | ddrescue 339 | ``` 340 | 341 | #### aaruBin 342 | 343 | This points to the location of Aaru binary: 344 | 345 | ```xml 346 | /usr/bin/aaru 347 | ``` 348 | 349 | #### ddrescueBin 350 | 351 | This points to the location of ddrescue binary: 352 | 353 | ```xml 354 | /usr/bin/ddrescue 355 | ``` 356 | 357 | #### blockSize 358 | 359 | This defines the block size used by ddrescue: 360 | 361 | ```xml 362 | 512 363 | ``` 364 | 365 | #### retries 366 | 367 | This sets the maximum number of times ddrescue will try to read an unreadable sector: 368 | 369 | ```xml 370 | 4 371 | ``` 372 | -------------------------------------------------------------------------------- /ipmlab/kbapi/sru.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Python API for KB SRU 4 | """ 5 | 6 | import sys 7 | import urllib 8 | import requests 9 | from lxml import etree 10 | 11 | SRU_BASEURL = 'http://jsru.kb.nl/sru/sru' 12 | SRU_BASEURL += '?version=1.2&maximumRecords=%i' 13 | SRU_BASEURL += '&operation=searchRetrieve' 14 | SRU_BASEURL += '&startRecord=%i' 15 | SRU_BASEURL += '&recordSchema=%s' 16 | SRU_BASEURL += '&x-collection=%s&query=%s' 17 | 18 | SETS = {'ANP': {'collection': 'ANP', 19 | 'description_en': 'Radio Bulletins ANP Press Agency', 20 | 'description_nl': 'ANP Radiobulletins Digitaal', 21 | 'metadataPrefix': 'didl', 22 | 'recordschema': 'dcx', 23 | 'setname': 'anp', 24 | 'time_period': [1937, 1989]}, 25 | 'DPO': {'collection': 'DPO_boekdeel', 26 | 'description_en': 'Early Dutch Books Online', 27 | 'description_nl': 'Early Dutch Books Online', 28 | 'metadataPrefix': 'didl', 29 | 'recordschema': 'ddd', 30 | 'setname': 'DPO', 31 | 'time_period': [1781, 1800]}, 32 | 'BYVANCK': {'description_en': 'Medieval Illuminated Manuscripts', 33 | 'description_nl': 'Middeleeuwse Verluchte Handschriften', 34 | 'metadataPrefix': 'dcx', 35 | 'setname': 'BYVANCK', 36 | 'time_period': [500, 1500]}, 37 | 'SGD': {'description_en': 'States General Digital', 38 | 'description_nl': 'Staten-Generaal Digitaal', 39 | 'metadataPrefix': 'dcx', 40 | 'setname': 'sgd:register', 41 | 'time_period': [1962, 1994]}, 42 | 'GGC': {'collection': 'GGC', 43 | 'description_en': 'General Catalogue KB', 44 | 'description_nl': 'Algemene Catalogus KB', 45 | 'metadataPrefix': 'dcx', 46 | 'recordschema': 'dcx', 47 | 'setname': 'ggc', 48 | 'time_period': [1937, 2021]}} # No idea what to use here? 49 | 50 | # Name spaces in GGC records 51 | 52 | srw_ns = 'http://www.loc.gov/zing/srw/' 53 | tel_ns = 'http://krait.kb.nl/coop/tel/handbook/telterms.html' 54 | xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance' 55 | dc_ns = 'http://purl.org/dc/elements/1.1/' 56 | dcterms_ns = 'http://purl.org/dc/terms/' 57 | dcx_ns = 'http://krait.kb.nl/coop/tel/handbook/telterms.html' 58 | 59 | NSMAPGGC = {"srw": srw_ns, 60 | "tel": tel_ns, 61 | "xsi": xsi_ns, 62 | "dc": dc_ns, 63 | "dcterms": dcterms_ns, 64 | "dcx": dcx_ns} 65 | 66 | 67 | class response(): 68 | def __init__(self, record_data, sru): 69 | self.record_data = record_data 70 | self.sru = sru 71 | 72 | def getElementText(self, tagName, attributeName, attributeValue): 73 | # Returns text content of all elements for which tag matches tagName, 74 | # and attribute value equals attributeValue. Set attributeName to empty 75 | # string to get all tagName matches. 76 | textFields = [] 77 | for r in self.record_data.iter(): 78 | if r.tag == tagName: 79 | if attributeName != '': 80 | try: 81 | if r.attrib[attributeName] == attributeValue: 82 | textFields.append(r.text) 83 | except KeyError: 84 | pass 85 | else: 86 | textFields.append(r.text) 87 | return textFields 88 | 89 | @property 90 | def records(self): 91 | if self.sru.nr_of_records == 0: 92 | record_data = "" 93 | else: 94 | ns = {'zs': 'http://www.loc.gov/zing/srw/'} 95 | record_data = self.record_data.xpath("zs:records/zs:record", 96 | namespaces=ns)[0] 97 | return record(record_data, self.sru) 98 | 99 | # Below property functions all return a list with all instances that satisfy 100 | # criteria 101 | 102 | @property 103 | def typesDutch(self): 104 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}type', 105 | '{http://www.w3.org/XML/1998/namespace}lang', 106 | 'nl')) 107 | 108 | @property 109 | def typesDCMI(self): 110 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}type', 111 | '{http://www.w3.org/2001/XMLSchema-instance}type', 112 | 'DCMIType')) 113 | 114 | @property 115 | def identifiersISBN(self): 116 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier', 117 | '{http://www.w3.org/2001/XMLSchema-instance}type', 118 | 'dcterms:ISBN')) 119 | 120 | @property 121 | def identifiersBrinkman(self): 122 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier', 123 | '{http://www.w3.org/2001/XMLSchema-instance}type', 124 | 'dcx:Brinkman')) 125 | 126 | @property 127 | def identifiersURI(self): 128 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier', 129 | '{http://www.w3.org/2001/XMLSchema-instance}type', 130 | 'dcterms:URI')) 131 | 132 | @property 133 | def identifiersOCLC(self): 134 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier', 135 | '{http://www.w3.org/2001/XMLSchema-instance}type', 136 | 'OCLC')) 137 | 138 | @property 139 | def languagesDutch(self): 140 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}language', 141 | '{http://www.w3.org/XML/1998/namespace}lang', 142 | 'nl')) 143 | 144 | @property 145 | def languagesEnglish(self): 146 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}language', 147 | '{http://www.w3.org/XML/1998/namespace}lang', 148 | 'en')) 149 | 150 | @property 151 | def languagesFrench(self): 152 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}language', 153 | '{http://www.w3.org/XML/1998/namespace}lang', 154 | 'fr')) 155 | 156 | @property 157 | def languagesISO639(self): 158 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}language', 159 | '{http://www.w3.org/2001/XMLSchema-instance}type', 160 | 'dcterms:ISO639-2')) 161 | 162 | @property 163 | def dates(self): 164 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}date', 165 | '', 166 | '')) 167 | 168 | @property 169 | def extents(self): 170 | return(self.getElementText('{http://purl.org/dc/terms/}extent', 171 | '', 172 | '')) 173 | 174 | @property 175 | def creators(self): 176 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}creator', 177 | '', 178 | '')) 179 | 180 | @property 181 | def contributors(self): 182 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}contributor', 183 | '', 184 | '')) 185 | 186 | @property 187 | def titles(self): 188 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}title', 189 | '', 190 | '')) 191 | 192 | @property 193 | def titlesMain(self): 194 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}title', 195 | '{http://www.w3.org/2001/XMLSchema-instance}type', 196 | 'dcx:maintitle')) 197 | 198 | @property 199 | def titlesIntermediate(self): 200 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}title', 201 | '{http://www.w3.org/2001/XMLSchema-instance}type', 202 | 'dcx:intermediatetitle')) 203 | 204 | @property 205 | def publishers(self): 206 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}publisher', 207 | '', 208 | '')) 209 | 210 | @property 211 | def countries(self): 212 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}country', 213 | '', 214 | '')) 215 | 216 | @property 217 | def subjectsBrinkman(self): 218 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}subject', 219 | '{http://www.w3.org/2001/XMLSchema-instance}type', 220 | 'dcx:Brinkman')) 221 | 222 | @property 223 | def subjectsISO9707(self): 224 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}subject', 225 | '{http://www.w3.org/2001/XMLSchema-instance}type', 226 | 'ISO_9707_[Brinkman]')) 227 | 228 | @property 229 | def subjectsUNESCO(self): 230 | return(self.getElementText('{http://purl.org/dc/elements/1.1/}subject', 231 | '{http://www.w3.org/2001/XMLSchema-instance}type', 232 | 'UNESCO')) 233 | 234 | @property 235 | def collectionIdentifiers(self): 236 | return(self.getElementText('{http://purl.org/dc/terms/}isPartOf', 237 | '{http://www.w3.org/2001/XMLSchema-instance}type', 238 | 'dcx:collectionIdentifier')) 239 | 240 | @property 241 | def recordIdentifiersURI(self): 242 | return(self.getElementText('{http://krait.kb.nl/coop/tel/handbook/telterms.html}recordIdentifier', 243 | '{http://www.w3.org/2001/XMLSchema-instance}type', 244 | 'dcterms:URI')) 245 | 246 | @property 247 | def annotations(self): 248 | # Note that annotations sometimes contain language or itenID attibutes; 249 | # ignored for now (collect everything). 250 | return(self.getElementText('{http://krait.kb.nl/coop/tel/handbook/telterms.html}annotation', 251 | '', 252 | '')) 253 | 254 | 255 | class record(): 256 | def __init__(self, record_data, sru): 257 | self.record_data = record_data 258 | self.sru = sru 259 | 260 | def __iter__(self): 261 | return self 262 | 263 | # This works under Python 2.7 264 | def next(self): 265 | if self.sru.nr_of_records == 0: 266 | raise StopIteration 267 | if self.sru.startrecord < self.sru.nr_of_records + 1: 268 | record_data = self.sru.run_query() 269 | self.sru.startrecord += 1 270 | return response(record_data, self.sru) 271 | else: 272 | raise StopIteration 273 | 274 | # This works under Python 3 275 | def __next__(self): 276 | if self.sru.nr_of_records == 0: 277 | raise StopIteration 278 | if self.sru.startrecord < self.sru.nr_of_records + 1: 279 | record_data = self.sru.run_query() 280 | self.sru.startrecord += 1 281 | return response(record_data, self.sru) 282 | else: 283 | raise StopIteration 284 | 285 | 286 | class sru(): 287 | DEBUG = False 288 | 289 | collection = False 290 | maximumrecords = 50 291 | nr_of_records = 0 292 | query = "" 293 | recordschema = False 294 | sru_collections = SETS 295 | startrecord = 0 296 | 297 | def search(self, query, collection=False, 298 | startrecord=1, maximumrecords=1, recordschema=False): 299 | 300 | self.maximumrecords = maximumrecords 301 | if sys.version.startswith('3'): 302 | self.query = urllib.parse.quote_plus(query) 303 | elif sys.version.startswith('2'): 304 | self.query = urllib.quote_plus(query) 305 | self.startrecord = startrecord 306 | 307 | if collection not in self.sru_collections: 308 | raise Exception('Unknown collection') 309 | 310 | self.collection = self.sru_collections[collection]['collection'] 311 | 312 | if not self.collection: 313 | raise Exception('Error, no collection specified') 314 | 315 | if not recordschema: 316 | self.recordschema = self.sru_collections[collection]['recordschema'] 317 | else: 318 | self.recordschema = recordschema 319 | 320 | record_data = self.run_query() 321 | 322 | nr_of_records = [i.text for i in record_data.iter() if 323 | i.tag.endswith('numberOfRecords')][0] 324 | 325 | self.nr_of_records = int(nr_of_records) 326 | 327 | if self.nr_of_records > 0: 328 | return response(record_data, self) 329 | 330 | return False 331 | 332 | def run_query(self): 333 | url = SRU_BASEURL % (self.maximumrecords, self.startrecord, 334 | self.recordschema, self.collection, self.query) 335 | if self.DEBUG: 336 | sys.stdout.write(url) 337 | 338 | r = requests.get(url) 339 | 340 | if not r.status_code == 200: 341 | raise Exception('Error while getting data from %s' % url) 342 | 343 | record_data = etree.fromstring(r.content) 344 | 345 | return record_data 346 | -------------------------------------------------------------------------------- /ipmlab/ipmlab.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """ 3 | Script for automated imaging of poertable media such as 4 | floppy discs and usb thumbdrives 5 | 6 | Author: Johan van der Knijff 7 | Research department, KB / National Library of the Netherlands 8 | 9 | """ 10 | 11 | import sys 12 | import os 13 | import csv 14 | import time 15 | import xml.etree.ElementTree as ETree 16 | import threading 17 | import uuid 18 | import logging 19 | import queue 20 | import tkinter as tk 21 | from tkinter import filedialog as tkFileDialog 22 | from tkinter import scrolledtext as ScrolledText 23 | from tkinter import messagebox as tkMessageBox 24 | from tkinter import ttk 25 | from . import config 26 | from .kbapi import sru 27 | from .socketserver import server 28 | from . import pmworker 29 | 30 | __version__ = '0.4.0' 31 | config.version = __version__ 32 | 33 | class carrierEntry(tk.Frame): 34 | 35 | """This class defines the graphical user interface + associated functions 36 | for associated actions 37 | """ 38 | 39 | def __init__(self, parent, *args, **kwargs): 40 | """Initiate class""" 41 | tk.Frame.__init__(self, parent, *args, **kwargs) 42 | self.root = parent 43 | # Logging stuff 44 | self.logger = logging.getLogger() 45 | # Create a logging handler using a queue 46 | self.log_queue = queue.Queue(-1) 47 | self.queue_handler = QueueHandler(self.log_queue) 48 | config.readyToStart = False 49 | self.catidOld = "" 50 | self.titleOld = "" 51 | self.volumeNoOld = "" 52 | self.carrierNumber = 0 53 | self.t1 = None 54 | self.t2 = None 55 | self.build_gui() 56 | 57 | def on_quit(self, event=None): 58 | """Wait until the medium that is currently being pocessed has 59 | finished, and quit (batch can be resumed by opening it in the File dialog) 60 | """ 61 | config.quitFlag = True 62 | self.bQuit.config(state='disabled') 63 | if config.batchIsOpen: 64 | msg = 'User pressed Quit, quitting after current medium has been processed' 65 | tkMessageBox.showinfo("Info", msg) 66 | 67 | msg = 'User pressed Quit, click OK to close ipmlab' 68 | 69 | if not config.readyToStart: 70 | # User hasn't yet created or opened a batch 71 | time.sleep(2) 72 | tkMessageBox.showinfo("Quit", msg) 73 | os._exit(0) 74 | else: 75 | # User has created or opened a batch 76 | # Wait until processingMedium flag is reset to False 77 | while config.processingMedium: 78 | time.sleep(2) 79 | # Wait 1 more second to avoid race condition 80 | time.sleep(2) 81 | 82 | handlers = self.logger.handlers[:] 83 | 84 | for handler in handlers: 85 | handler.close() 86 | self.logger.removeHandler(handler) 87 | 88 | msg = 'User pressed Quit, click OK to close ipmlab' 89 | tkMessageBox.showinfo("Quit", msg) 90 | os._exit(0) 91 | 92 | 93 | def on_create(self, event=None): 94 | """Create new batch in rootDir""" 95 | 96 | # Create unique batch identifier (UUID, based on host ID and current time) 97 | # this ensures that if the software is run in parallel on different machines 98 | # the batch identifiers will always be unique 99 | batchID = str(uuid.uuid1()) 100 | 101 | # Construct batch name 102 | batchName = config.prefixBatch + '-' + batchID 103 | config.batchFolder = os.path.join(config.rootDir, batchName) 104 | try: 105 | os.makedirs(config.batchFolder) 106 | except IOError: 107 | msg = 'Cannot create batch folder ' + config.batchFolder 108 | tkMessageBox.showerror("Error", msg) 109 | 110 | # Write Ipmlab version to file in batch 111 | versionFile = os.path.join(config.batchFolder, 'version.txt') 112 | with open(versionFile, "w") as vf: 113 | vf.write(config.version + '\n') 114 | 115 | # Define batch manifest (CSV file with minimal metadata on each carrier) 116 | config.batchManifest = os.path.join(config.batchFolder, 'manifest.csv') 117 | 118 | # Write header row if batch manifest doesn't exist already 119 | if not os.path.isfile(config.batchManifest): 120 | headerBatchManifest = (['jobID', 121 | 'PPN', 122 | 'volumeNo', 123 | 'title', 124 | 'success', 125 | 'readErrors']) 126 | 127 | # Open batch manifest in append mode 128 | bm = open(config.batchManifest, "a", encoding="utf-8") 129 | 130 | # Create CSV writer object 131 | csvBm = csv.writer(bm, lineterminator='\n') 132 | 133 | # Write header to batch manifest and close file 134 | csvBm.writerow(headerBatchManifest) 135 | bm.close() 136 | 137 | # Set up logging 138 | successLogger = True 139 | 140 | try: 141 | self.setupLogger() 142 | # Start polling log messages from the queue 143 | self.after(100, self.poll_log_queue) 144 | except OSError: 145 | # Something went wrong while trying to write to lof file 146 | msg = ('error trying to write log file') 147 | tkMessageBox.showerror("ERROR", msg) 148 | successLogger = False 149 | 150 | if successLogger: 151 | # Notify user 152 | msg = 'Created batch ' + batchName 153 | tkMessageBox.showinfo("Created batch", msg) 154 | logging.info(''.join(['batchFolder set to ', config.batchFolder])) 155 | 156 | # Update state of buttons / widgets 157 | self.bNew.config(state='disabled') 158 | self.bOpen.config(state='disabled') 159 | self.bFinalise.config(state='normal') 160 | if config.enablePPNLookup: 161 | self.catid_entry.config(state='normal') 162 | self.usepreviousPPN_button.config(state='normal') 163 | else: 164 | self.title_entry.config(state='normal') 165 | self.usepreviousTitle_button.config(state='normal') 166 | self.volumeNo_entry.config(state='normal') 167 | self.volumeNo_entry.delete(0, tk.END) 168 | self.volumeNo_entry.insert(tk.END, "1") 169 | self.submit_button.config(state='normal') 170 | 171 | # Flag that is True if batch is open 172 | config.batchIsOpen = True 173 | # Set readyToStart flag to True, 174 | config.readyToStart = True 175 | 176 | 177 | def on_open(self, event=None): 178 | """Open existing batch""" 179 | 180 | # defining options for opening a directory 181 | self.dir_opt = options = {} 182 | options['initialdir'] = config.rootDir 183 | options['mustexist'] = True 184 | options['parent'] = self.root 185 | options['title'] = 'Select batch directory' 186 | config.batchFolder = tkFileDialog.askdirectory(**self.dir_opt) 187 | config.batchManifest = os.path.join(config.batchFolder, 'manifest.csv') 188 | 189 | # Check if batch was already finalized, and exit if so 190 | print(os.path.join(config.batchFolder, 'eob.txt')) 191 | if os.path.isfile(os.path.join(config.batchFolder, 'eob.txt')): 192 | msg = 'cannot open finalized batch' 193 | tkMessageBox.showerror("Error", msg) 194 | else: 195 | 196 | # Set up logging 197 | successLogger = True 198 | 199 | try: 200 | self.setupLogger() 201 | # Start polling log messages from the queue 202 | self.after(100, self.poll_log_queue) 203 | except OSError: 204 | # Something went wrong while trying to write to lof file 205 | msg = ('error trying to write log file') 206 | tkMessageBox.showerror("ERROR", msg) 207 | successLogger = False 208 | 209 | if successLogger: 210 | logging.info(''.join(['*** Opening existing batch ', config.batchFolder, ' ***'])) 211 | 212 | if config.batchFolder != '': 213 | 214 | # Update state of buttons /widgets, taking into account whether batch was 215 | # finalized by user 216 | self.bNew.config(state='disabled') 217 | self.bOpen.config(state='disabled') 218 | self.submit_button.config(state='normal') 219 | self.bFinalise.config(state='normal') 220 | if config.enablePPNLookup: 221 | self.catid_entry.config(state='normal') 222 | self.usepreviousPPN_button.config(state='normal') 223 | else: 224 | self.title_entry.config(state='normal') 225 | self.usepreviousTitle_button.config(state='normal') 226 | self.volumeNo_entry.config(state='normal') 227 | self.volumeNo_entry.delete(0, tk.END) 228 | self.volumeNo_entry.insert(tk.END, "1") 229 | 230 | # Flag that is True if batch is open 231 | config.batchIsOpen = True 232 | # Set readyToStart flag to True 233 | config.readyToStart = True 234 | 235 | def on_finalise(self, event=None): 236 | """Finalise batch after user pressed finalise button""" 237 | msg = ("This will finalise the current batch.\n After finalising no further " 238 | "media can be added. Are you sure you want to do this?") 239 | if tkMessageBox.askyesno("Confirm", msg): 240 | jobFile = 'eob.txt' 241 | fJob = open(os.path.join(config.batchFolder, jobFile), "w", encoding="utf-8") 242 | lineOut = 'EOB\n' 243 | fJob.write(lineOut) 244 | fJob.close() 245 | 246 | self.bFinalise.config(state='disabled') 247 | self.submit_button.config(state='disabled') 248 | if config.enablePPNLookup: 249 | self.catid_entry.config(state='disabled') 250 | self.usepreviousPPN_button.config(state='disabled') 251 | else: 252 | self.title_entry.config(state='disabled') 253 | self.usepreviousTitle_button.config(state='disabled') 254 | self.volumeNo_entry.delete(0, tk.END) 255 | self.volumeNo_entry.config(state='disabled') 256 | 257 | # Wait until processingMedium flag is reset to False 258 | while config.processingMedium: 259 | time.sleep(2) 260 | 261 | config.readyToStart = False 262 | config.finishedBatch = True 263 | 264 | handlers = self.logger.handlers[:] 265 | for handler in handlers: 266 | handler.close() 267 | self.logger.removeHandler(handler) 268 | # Notify user 269 | msg = 'Finished processing this batch' 270 | tkMessageBox.showinfo("Finished", msg) 271 | # Reset the GUI 272 | self.reset_gui() 273 | 274 | def on_usepreviousPPN(self, event=None): 275 | """Add previously entered PPN to entry field""" 276 | if self.catidOld == "": 277 | msg = "Previous PPN is not defined" 278 | tkMessageBox.showerror("PPN not defined", msg) 279 | else: 280 | self.catid_entry.delete(0, tk.END) 281 | self.catid_entry.insert(tk.END, self.catidOld) 282 | if self.volumeNoOld != "": 283 | # Increase volume number value 284 | volumeNoNew = str(int(self.volumeNoOld) + 1) 285 | self.volumeNo_entry.delete(0, tk.END) 286 | self.volumeNo_entry.insert(tk.END, volumeNoNew) 287 | 288 | def on_usepreviousTitle(self, event=None): 289 | """Add previously entered title to title field""" 290 | if self.titleOld == "": 291 | msg = "Previous title is not defined" 292 | tkMessageBox.showerror("Tile not defined", msg) 293 | else: 294 | self.title_entry.delete(0, tk.END) 295 | self.title_entry.insert(tk.END, self.titleOld) 296 | if self.volumeNoOld != "": 297 | volumeNoNew = str(int(self.volumeNoOld) + 1) 298 | self.volumeNo_entry.delete(0, tk.END) 299 | self.volumeNo_entry.insert(tk.END, volumeNoNew) 300 | 301 | def on_submit(self, event=None): 302 | """Process one record and add it to the queue after user pressed submit button""" 303 | 304 | config.processingMedium = True 305 | mediumLoaded = False 306 | 307 | # Fetch entered values (strip any leading / tralue whitespace characters) 308 | if config.enablePPNLookup: 309 | catid = self.catid_entry.get().strip() 310 | self.catidOld = catid 311 | else: 312 | catid = "" 313 | title = self.title_entry.get().strip() 314 | self.titleOld = title 315 | volumeNo = self.volumeNo_entry.get().strip() 316 | self.volumeNoOld = volumeNo 317 | 318 | if config.enablePPNLookup: 319 | # Check for empty string 320 | if str(catid) == '': 321 | noGGCRecords = 0 322 | else: 323 | # Lookup catalog identifier 324 | sruSearchString = 'OaiPmhIdentifier="GGC:AC:' + str(catid) + '"' 325 | response = sru.search(sruSearchString, "GGC") 326 | 327 | if not response: 328 | noGGCRecords = 0 329 | else: 330 | noGGCRecords = response.sru.nr_of_records 331 | else: 332 | noGGCRecords = 1 333 | 334 | if not config.batchIsOpen: 335 | msg = "You must first create a batch or open an existing batch" 336 | tkMessageBox.showerror("Not ready", msg) 337 | elif not representsInt(volumeNo): 338 | msg = "Volume number must be integer value" 339 | tkMessageBox.showerror("Type mismatch", msg) 340 | elif int(volumeNo) < 1: 341 | msg = "Volume number must be greater than or equal to 1" 342 | tkMessageBox.showerror("Value error", msg) 343 | elif noGGCRecords == 0: 344 | # No matching record found 345 | msg = ("Search for PPN=" + str(catid) + " returned " + 346 | "no matching record in catalog!") 347 | tkMessageBox.showerror("PPN not found", msg) 348 | else: 349 | if config.enablePPNLookup: 350 | # Matching record found. Display title and ask for confirmation 351 | record = next(response.records) 352 | 353 | # Title can be in either in: 354 | # 1. title element 355 | # 2. title element with maintitle attribute 356 | # 3. title element with intermediatetitle attribute (3 in combination with 2) 357 | 358 | titlesMain = record.titlesMain 359 | titlesIntermediate = record.titlesIntermediate 360 | titles = record.titles 361 | 362 | if titlesMain != []: 363 | title = titlesMain[0] 364 | if titlesIntermediate != []: 365 | title = title + ", " + titlesIntermediate[0] 366 | else: 367 | title = titles[0] 368 | 369 | msg = "Found title:\n\n'" + title + "'.\n\n Is this correct?" 370 | if tkMessageBox.askyesno("Confirm", msg): 371 | msg = ("Please load medium ('" + title + "', volume " + str(volumeNo) + 372 | ") and press 'OK'") 373 | tkMessageBox.showinfo("Load medium", msg) 374 | 375 | while not mediumLoaded: 376 | try: 377 | fd= os.open(config.inDevice , os.O_RDONLY) 378 | os.close(fd) 379 | mediumLoaded = True 380 | except(PermissionError, OSError): 381 | msg = ("No medium found, please load medium and press 'OK'") 382 | tkMessageBox.showinfo("Load medium", msg) 383 | 384 | # Create unique identifier for this job (UUID, based on host ID and current time) 385 | jobID = str(uuid.uuid1()) 386 | 387 | # Update carrierNumber (only used to indicate order of all media in batch in widget) 388 | self.carrierNumber += 1 389 | 390 | # Set up dictionary that holds carrier data 391 | carrierData = {} 392 | carrierData['jobID'] = jobID 393 | carrierData['PPN'] = catid 394 | carrierData['title'] = title 395 | carrierData['volumeNo'] = volumeNo 396 | 397 | # Display PPN/Title + Volume number in treeview widget 398 | self.tv.insert('', 0, text=str(self.carrierNumber), values=(catid, title, volumeNo)) 399 | 400 | if config.enablePPNLookup: 401 | self.catid_entry.config(state='disabled') 402 | self.usepreviousPPN_button.config(state='disabled') 403 | else: 404 | self.title_entry.config(state='disabled') 405 | self.usepreviousTitle_button.config(state='disabled') 406 | 407 | self.volumeNo_entry.config(state='disabled') 408 | self.submit_button.config(state='disabled') 409 | 410 | # Process carrier in separate thread 411 | self.t1 = threading.Thread(target=pmworker.processMedium, args=[carrierData]) 412 | self.t1.start() 413 | 414 | else: 415 | # Clear entry fields 416 | if config.enablePPNLookup: 417 | self.catid_entry.delete(0, tk.END) 418 | else: 419 | self.title_entry.delete(0, tk.END) 420 | 421 | 422 | def setupLogger(self): 423 | """Set up logging-related settings""" 424 | logFile = os.path.join(config.batchFolder, 'batch.log') 425 | 426 | logging.basicConfig(handlers=[logging.FileHandler(logFile, 'a', 'utf-8')], 427 | level=logging.INFO, 428 | format='%(asctime)s - %(levelname)s - %(message)s') 429 | 430 | # Add the handler to logger 431 | self.logger = logging.getLogger() 432 | # This sets the console output format (slightly different from basicConfig!) 433 | formatter = logging.Formatter('%(levelname)s: %(message)s') 434 | self.queue_handler.setFormatter(formatter) 435 | self.logger.addHandler(self.queue_handler) 436 | 437 | 438 | def display(self, record): 439 | """Display log record in scrolledText widget""" 440 | msg = self.queue_handler.format(record) 441 | self.st.configure(state='normal') 442 | self.st.insert(tk.END, msg + '\n', record.levelname) 443 | self.st.configure(state='disabled') 444 | # Autoscroll to the bottom 445 | self.st.yview(tk.END) 446 | 447 | 448 | def poll_log_queue(self): 449 | """Check every 100ms if there is a new message in the queue to display""" 450 | while True: 451 | try: 452 | record = self.log_queue.get(block=False) 453 | except queue.Empty: 454 | break 455 | else: 456 | self.display(record) 457 | self.after(100, self.poll_log_queue) 458 | 459 | 460 | def build_gui(self): 461 | """Build the GUI""" 462 | 463 | # Read configuration file 464 | configFileDefinedFlag, configFileExistsFlag, configFileOpenFlag, configFileParsedFlag = getConfiguration() 465 | 466 | self.root.title('ipmlab v.' + config.version) 467 | self.root.option_add('*tearOff', 'FALSE') 468 | self.grid(column=0, row=0, sticky='ew') 469 | self.grid_columnconfigure(0, weight=1, uniform='a') 470 | self.grid_columnconfigure(1, weight=1, uniform='a') 471 | self.grid_columnconfigure(2, weight=1, uniform='a') 472 | self.grid_columnconfigure(3, weight=1, uniform='a') 473 | 474 | # Set GUI geometry 475 | windowWidth = 700 476 | windowHeight = 730 477 | 478 | # get the screen dimension 479 | screenWidth = self.root.winfo_screenwidth() 480 | screenHeight = self.root.winfo_screenheight() 481 | 482 | # find the center point 483 | centerX = int(screenWidth/2 - windowWidth / 2) 484 | centerY = int(screenHeight/2 - windowHeight / 2) 485 | 486 | # set the position of the window to the center of the screen 487 | self.root.geometry(f'{windowWidth}x{windowHeight}+{centerX}+{centerY}') 488 | # Disable resize 489 | self.root.resizable(False, False) 490 | 491 | # Batch toolbar 492 | self.bNew = tk.Button(self, 493 | text="New", 494 | height=2, 495 | width=4, 496 | underline=0, 497 | command=self.on_create) 498 | self.bNew.grid(column=0, row=1, sticky='ew') 499 | self.bOpen = tk.Button(self, 500 | text="Open", 501 | height=2, 502 | width=4, 503 | underline=0, 504 | command=self.on_open) 505 | self.bOpen.grid(column=1, row=1, sticky='ew') 506 | self.bFinalise = tk.Button(self, 507 | text="Finalize", 508 | height=2, 509 | width=4, 510 | underline=0, 511 | command=self.on_finalise) 512 | self.bFinalise.grid(column=2, row=1, sticky='ew') 513 | self.bQuit = tk.Button(self, 514 | text="Quit", 515 | height=2, 516 | width=4, 517 | underline=0, 518 | command=self.on_quit) 519 | self.bQuit.grid(column=3, row=1, sticky='ew') 520 | 521 | # Disable finalise button on startup 522 | self.bFinalise.config(state='disabled') 523 | 524 | ttk.Separator(self, orient='horizontal').grid(column=0, row=2, columnspan=4, sticky='ew') 525 | 526 | # Entry elements for each carrier 527 | 528 | if config.enablePPNLookup: 529 | # Catalog ID (PPN) 530 | tk.Label(self, text='PPN').grid(column=0, row=3, sticky='w') 531 | self.catid_entry = tk.Entry(self, width=20, state='disabled') 532 | 533 | # Pressing this button adds previously entered PPN to entry field 534 | self.usepreviousPPN_button = tk.Button(self, 535 | text='Use previous', 536 | height=1, 537 | width=2, 538 | underline=0, 539 | state='disabled', 540 | command=self.on_usepreviousPPN) 541 | self.usepreviousPPN_button.grid(column=2, row=3, sticky='ew') 542 | 543 | self.catid_entry.grid(column=1, row=3, sticky='w') 544 | else: 545 | # PPN lookup disabled, so present Title entry field 546 | tk.Label(self, text='Title').grid(column=0, row=3, sticky='w') 547 | self.title_entry = tk.Entry(self, width=45, state='disabled') 548 | 549 | # Pressing this button adds previously entered title to entry field 550 | self.usepreviousTitle_button = tk.Button(self, 551 | text='Use previous', 552 | height=1, 553 | width=2, 554 | underline=0, 555 | state='disabled', 556 | command=self.on_usepreviousTitle) 557 | self.usepreviousTitle_button.grid(column=3, row=3, sticky='ew') 558 | self.title_entry.grid(column=1, row=3, sticky='w', columnspan=3) 559 | 560 | # Volume number 561 | tk.Label(self, text='Volume number').grid(column=0, row=4, sticky='w') 562 | self.volumeNo_entry = tk.Entry(self, width=5, state='disabled') 563 | 564 | self.volumeNo_entry.grid(column=1, row=4, sticky='w') 565 | 566 | ttk.Separator(self, orient='horizontal').grid(column=0, row=5, columnspan=4, sticky='ew') 567 | 568 | self.submit_button = tk.Button(self, 569 | text='Submit', 570 | height=2, 571 | width=4, 572 | underline=0, 573 | state='disabled', 574 | command=self.on_submit) 575 | self.submit_button.grid(column=1, row=6, sticky='ew') 576 | 577 | ttk.Separator(self, orient='horizontal').grid(column=0, row=7, columnspan=4, sticky='ew') 578 | 579 | # Treeview widget displays info on entered carriers 580 | self.tv = ttk.Treeview(self, height=10, 581 | columns=('PPN', 'Title', 'VolumeNo')) 582 | self.tv.heading('#0', text='Queue #') 583 | self.tv.heading('#1', text='PPN') 584 | self.tv.heading('#2', text='Title') 585 | self.tv.heading('#3', text='Volume #') 586 | self.tv.column('#0', stretch=tk.YES, width=5) 587 | self.tv.column('#1', stretch=tk.YES, width=10) 588 | self.tv.column('#2', stretch=tk.YES, width=250) 589 | self.tv.column('#3', stretch=tk.YES, width=5) 590 | self.tv.grid(column=0, row=8, sticky='ew', columnspan=4) 591 | 592 | # ScrolledText widget displays logging info 593 | self.st = ScrolledText.ScrolledText(self, state='disabled', height=15) 594 | self.st.configure(font='TkFixedFont') 595 | self.st.grid(column=0, row=10, sticky='ew', columnspan=4) 596 | 597 | # Define bindings for keyboard shortcuts: buttons 598 | self.root.bind_all('', self.on_create) 599 | self.root.bind_all('', self.on_open) 600 | self.root.bind_all('', self.on_finalise) 601 | self.root.bind_all('', self.on_quit) 602 | self.root.bind_all('', self.on_submit) 603 | 604 | # TODO keyboard shortcuts for Radiobox selections: couldn't find ANY info on how to do this! 605 | 606 | for child in self.winfo_children(): 607 | child.grid_configure(padx=5, pady=5) 608 | 609 | # Display message and exit if config file is either undefined, doesn't exist, cannot 610 | # be opened or cannot be parsed 611 | 612 | if not configFileDefinedFlag: 613 | msg = "configuration file is undefined" 614 | errorExit(msg) 615 | if not configFileExistsFlag: 616 | msg = "configuration file doesn't exist" 617 | errorExit(msg) 618 | if not configFileOpenFlag: 619 | msg = "configuration file cannot be opened" 620 | errorExit(msg) 621 | if not configFileParsedFlag: 622 | msg = "unable to parse configuration file" 623 | errorExit(msg) 624 | 625 | # Check if all needed binaries exist, and exit if not 626 | if not os.path.isfile(config.fiwalkBin): 627 | msg = "Fiwalk binary " + config.fiwalkBin + " does not exist" 628 | errorExit(msg) 629 | if config.imagingApplication == "aaru": 630 | if not os.path.isfile(config.aaruBin): 631 | msg = "Aaru binary " + config.aaruBin + " does not exist" 632 | errorExit(msg) 633 | elif config.imagingApplication == "ddrescue": 634 | if not os.path.isfile(config.ddrescueBin): 635 | msg = "Ddrescue binary " + config.ddrescueBin + " does not exist" 636 | errorExit(msg) 637 | else: 638 | msg = config.imagingApplication + " is not a recognized imagingApplication value" 639 | errorExit(msg) 640 | 641 | # Check if root dir exists, and exit if not 642 | if not os.path.isdir(config.rootDir): 643 | msg = "root directory " + config.rootDir + " does not exist" 644 | errorExit(msg) 645 | 646 | # Check if input device exists, and exit if not 647 | try: 648 | os.stat(config.inDevice) 649 | except OSError: 650 | msg = "inDevice " + config.inDevice + " does not exist" 651 | errorExit(msg) 652 | 653 | def reset_carrier(self): 654 | """Reset the carrier entry fields""" 655 | # Reset and ere-enable entry fields, and set focus on PPN / Title field 656 | 657 | if config.enablePPNLookup: 658 | self.usepreviousPPN_button.config(state='normal') 659 | self.catid_entry.config(state='normal') 660 | self.catid_entry.delete(0, tk.END) 661 | self.catid_entry.focus_set() 662 | else: 663 | self.title_entry.config(state='normal') 664 | self.usepreviousTitle_button.config(state='normal') 665 | self.title_entry.delete(0, tk.END) 666 | self.title_entry.focus_set() 667 | 668 | self.volumeNo_entry.config(state='normal') 669 | self.submit_button.config(state='normal') 670 | self.volumeNo_entry.delete(0, tk.END) 671 | self.volumeNo_entry.insert(tk.END, "1") 672 | 673 | def reset_gui(self): 674 | """Reset the GUI""" 675 | # Reset carrierNumber 676 | self.carrierNumber = 0 677 | # Clear items in treeview widget 678 | tvItems = self.tv.get_children() 679 | for item in tvItems: 680 | self.tv.delete(item) 681 | # Clear contents of ScrolledText widget 682 | # Only works if state is 'normal' 683 | self.st.config(state='normal') 684 | self.st.delete(1.0, tk.END) 685 | self.st.config(state='disabled') 686 | # Logging stuff 687 | self.logger = logging.getLogger() 688 | # Create a logging handler using a queue 689 | self.log_queue = queue.Queue(-1) 690 | self.queue_handler = QueueHandler(self.log_queue) 691 | config.readyToStart = False 692 | self.catidOld = "" 693 | self.titleOld = "" 694 | self.volumeNoOld = "" 695 | 696 | # Update state of buttons / widgets 697 | self.bNew.config(state='normal') 698 | self.bOpen.config(state='normal') 699 | self.bFinalise.config(state='disabled') 700 | self.bQuit.config(state='normal') 701 | self.submit_button.config(state='disabled') 702 | if config.enablePPNLookup: 703 | self.catid_entry.config(state='disabled') 704 | self.usepreviousPPN_button.config(state='disabled') 705 | else: 706 | self.title_entry.config(state='disabled') 707 | self.usepreviousTitle_button.config(state='disabled') 708 | self.volumeNo_entry.config(state='disabled') 709 | 710 | def handleSocketRequests(self, q): 711 | """ Update contents of PPN and Title widgets on incoming requests from socket interface 712 | """ 713 | try: 714 | message = q.get_nowait() 715 | if config.enablePPNLookup: 716 | try: 717 | catid = message 718 | self.catid_entry.delete(0, tk.END) 719 | self.catid_entry.insert(tk.END, catid) 720 | if catid == self.catidOld and catid != "": 721 | # Increase volume number value if existing catid 722 | volumeNoNew = str(int(self.volumeNoOld) + 1) 723 | self.volumeNo_entry.delete(0, tk.END) 724 | self.volumeNo_entry.insert(tk.END, volumeNoNew) 725 | except: 726 | # TODO: catch more specific errors here? 727 | pass 728 | else: 729 | try: 730 | title = message 731 | self.title_entry.delete(0, tk.END) 732 | self.title_entry.insert(tk.END, title) 733 | if title == self.titleOld and title != "": 734 | # Increase volume number value if existing catid 735 | volumeNoNew = str(int(self.volumeNoOld) + 1) 736 | self.volumeNo_entry.delete(0, tk.END) 737 | self.volumeNo_entry.insert(tk.END, volumeNoNew) 738 | except: 739 | # TODO: catch more specific errors here? 740 | pass 741 | except queue.Empty: 742 | pass 743 | 744 | class QueueHandler(logging.Handler): 745 | """Class to send logging records to a queue 746 | It can be used from different threads 747 | The ConsoleUi class polls this queue to display records in a ScrolledText widget 748 | Taken from https://github.com/beenje/tkinter-logging-text-widget/blob/master/main.py 749 | """ 750 | 751 | def __init__(self, log_queue): 752 | super().__init__() 753 | self.log_queue = log_queue 754 | 755 | def emit(self, record): 756 | self.log_queue.put(record) 757 | 758 | 759 | def representsInt(s): 760 | """Return True if s is an integer, False otherwise""" 761 | # Source: http://stackoverflow.com/a/1267145 762 | try: 763 | int(s) 764 | return True 765 | except ValueError: 766 | return False 767 | 768 | 769 | def errorExit(error): 770 | """Show error message in messagebox and then exit after userv presses OK""" 771 | tkMessageBox.showerror("Error", error) 772 | sys.exit() 773 | 774 | 775 | def findElementTextOld(elt, elementPath): 776 | """Returns element text if it exists, errorExit if it doesn't exist""" 777 | elementText = elt.findtext(elementPath) 778 | if elementText is None: 779 | msg = 'no element found at ' + elementPath 780 | errorExit(msg) 781 | else: 782 | return elementText 783 | 784 | 785 | def findElementText(elt, elementPath): 786 | """Returns element text if it exists, errorExit if it doesn't exist""" 787 | elementText = elt.findtext(elementPath) 788 | if elementText is None: 789 | elementText = "" 790 | return elementText 791 | 792 | 793 | def getConfiguration(): 794 | """ Read configuration file, make all config variables available via 795 | config.py and check that all file paths / executables exist. 796 | This assumes an non-frozen script (no Py2Exe!) 797 | """ 798 | 799 | configFileDefinedFlag = False 800 | configFileExistsFlag = False 801 | configFileOpenFlag = False 802 | configFileParsedFlag = False 803 | 804 | packageDir = os.path.dirname(os.path.abspath(__file__)) 805 | homeDir = os.path.normpath(os.path.expanduser("~")) 806 | if packageDir.startswith(homeDir): 807 | configFileUser = os.path.join(homeDir, '.config/ipmlab/config.xml') 808 | else: 809 | configFileUser = os.path.normpath('/etc/ipmlab/config.xml') 810 | configFileDefinedFlag = True 811 | 812 | # Check if user config file exists and exit if not 813 | if os.path.isfile(configFileUser): 814 | configFileExistsFlag = True 815 | 816 | # Read contents to bytes object 817 | if configFileExistsFlag: 818 | try: 819 | fConfig = open(configFileUser, "rb") 820 | configBytes = fConfig.read() 821 | fConfig.close() 822 | configFileOpenFlag = True 823 | except IOError: 824 | pass 825 | 826 | # Parse XML tree 827 | if configFileOpenFlag: 828 | try: 829 | root = ETree.fromstring(configBytes) 830 | configFileParsedFlag = True 831 | except Exception: 832 | pass 833 | 834 | if configFileParsedFlag: 835 | 836 | # Create empty element object & add config contents to it 837 | # A bit silly but allows use of findElementText in etpatch 838 | 839 | configElt = ETree.Element("bogus") 840 | configElt.append(root) 841 | 842 | config.inDevice = findElementText(configElt, './config/inDevice') 843 | config.rootDir = findElementText(configElt, './config/rootDir') 844 | config.prefixBatch = findElementText(configElt, './config/prefixBatch') 845 | config.fiwalkBin = findElementText(configElt, './config/fiwalkBin') 846 | config.imagingApplication = findElementText(configElt, './config/imagingApplication') 847 | config.aaruBin = findElementText(configElt, './config/aaruBin') 848 | config.ddrescueBin = findElementText(configElt, './config/ddrescueBin') 849 | config.blockSize = findElementText(configElt, './config/blockSize') 850 | config.retries = findElementText(configElt, './config/retries') 851 | 852 | # For below configuration variables, use default value if value cannot be 853 | # read from config file (this ensures v1 will work with old config files) 854 | try: 855 | config.socketHost = findElementText(configElt, './config/socketHost') 856 | except: 857 | pass 858 | try: 859 | config.socketPort = findElementText(configElt, './config/socketPort') 860 | except: 861 | pass 862 | try: 863 | if findElementText(configElt, './config/enablePPNLookup') == "True": 864 | config.enablePPNLookup = True 865 | else: 866 | config.enablePPNLookup = False 867 | except: 868 | pass 869 | try: 870 | if findElementText(configElt, './config/enableSocketAPI') == "True": 871 | config.enableSocketAPI = True 872 | else: 873 | config.enableSocketAPI = False 874 | except: 875 | pass 876 | 877 | # Normalise all file paths 878 | config.rootDir = os.path.normpath(config.rootDir) 879 | config.fiwalkBin = os.path.normpath(config.fiwalkBin) 880 | config.aaruBin = os.path.normpath(config.aaruBin) 881 | config.ddrescueBin = os.path.normpath(config.ddrescueBin) 882 | 883 | return configFileDefinedFlag, configFileExistsFlag, configFileOpenFlag, configFileParsedFlag 884 | 885 | 886 | def main(): 887 | """Main function""" 888 | config.version = __version__ 889 | root = tk.Tk() 890 | myCarrierEntry = carrierEntry(root) 891 | # This ensures application quits normally if user closes window 892 | root.protocol('WM_DELETE_WINDOW', myCarrierEntry.on_quit) 893 | # Start socket API as separate thread 894 | if config.enableSocketAPI: 895 | q = queue.Queue() 896 | myServer = server() 897 | myCarrierEntry.t2 = threading.Thread(target=server.start, 898 | args=[myServer, 899 | config.socketHost, 900 | config.socketPort, 901 | q]) 902 | myCarrierEntry.t2.start() 903 | 904 | while True: 905 | if config.enableSocketAPI: 906 | myCarrierEntry.handleSocketRequests(q) 907 | root.update_idletasks() 908 | root.update() 909 | time.sleep(0.1) 910 | if config.finishedMedium: 911 | myCarrierEntry.t1.join() 912 | # Prompt operator to remove medium 913 | msg = ("Please remove the medium, then press 'OK'") 914 | tkMessageBox.showinfo("Remove medium", msg) 915 | myCarrierEntry.reset_carrier() 916 | config.processingMedium = False 917 | config.finishedMedium = False 918 | 919 | 920 | if __name__ == "__main__": 921 | main() 922 | --------------------------------------------------------------------------------