├── a3m ├── fpr │ ├── __init__.py │ └── migrations │ │ └── __init__.py ├── settings │ ├── __init__.py │ └── test.py ├── main │ ├── management │ │ └── __init__.py │ ├── migrations │ │ ├── __init__.py │ │ └── 0002_initial_data.py │ ├── apps.py │ └── __init__.py ├── client │ ├── clientScripts │ │ ├── __init__.py │ │ ├── failed_transfer_cleanup.py │ │ ├── cmd_cp.py │ │ ├── cmd_chmod.py │ │ ├── cmd_mkdir.py │ │ ├── cmd_test.py │ │ ├── cmd_tree.py │ │ ├── failed_sip_cleanup.py │ │ ├── copy_submission_docs.py │ │ ├── manual_normalization_identify_files_included.py │ │ ├── remove_directories.py │ │ ├── examine_contents.py │ │ ├── check_transfer_directory_for_objects.py │ │ ├── save_dublin_core.py │ │ ├── check_for_submission_documentation.py │ │ ├── manual_normalization_check_for_manual_normalization_directory.py │ │ ├── has_packages.py │ │ ├── load_dublin_core.py │ │ ├── verify_mets.py │ │ ├── remove_unneeded_files.py │ │ ├── remove_hidden_files_and_directories.py │ │ ├── a3m_store_aip.py │ │ ├── move_transfer.py │ │ ├── remove_files_without_premis_metadata.py │ │ ├── load_labels_from_csv.py │ │ ├── create_transfer_metadata.py │ │ ├── move_sip.py │ │ ├── restructure_for_compliance.py │ │ ├── copy_transfer_submission_documentation.py │ │ ├── manual_normalization_remove_mn_directories.py │ │ ├── store_file_modification_dates.py │ │ ├── verify_transfer_compliance.py │ │ ├── verify_sip_compliance.py │ │ └── change_names.py │ ├── __init__.py │ └── assets │ │ ├── catalog │ │ └── catalog.xml │ │ └── mets │ │ └── xlink.xsd ├── cli │ ├── server │ │ ├── __init__.py │ │ └── __main__.py │ ├── client │ │ ├── __init__.py │ │ └── wrapper.py │ ├── __init__.py │ └── common.py ├── server │ ├── rpc │ │ ├── __init__.py │ │ └── client.py │ ├── tasks │ │ ├── __init__.py │ │ └── backends │ │ │ ├── __init__.py │ │ │ └── base.py │ ├── processing.py │ ├── jobs │ │ ├── __init__.py │ │ ├── decisions.py │ │ └── base.py │ ├── shared_dirs.py │ ├── __init__.py │ └── translation.py ├── api │ └── transferservice │ │ └── v1beta1 │ │ ├── request_response_pb2_grpc.py │ │ ├── service_pb2.pyi │ │ ├── __init__.py │ │ └── service_pb2.py ├── bag.py ├── __init__.py ├── namespaces.py └── common_metrics.py ├── .python-version ├── .codecov.yml ├── docs ├── changelog.rst ├── _static │ └── custom.css ├── index.rst ├── Makefile ├── conf.py ├── settings.rst ├── installation.rst ├── development.rst ├── usage.rst └── contributing.rst ├── buf.lock ├── tests ├── client │ ├── fixtures │ │ ├── create_aip_mets │ │ │ └── objects │ │ │ │ ├── evelyn_s_photo.jpg │ │ │ │ └── evelyn_s_photo-d8cc7af7-284a-42f5-b7f4-e181a0efc35f.tif │ │ ├── emptysip │ │ │ └── objects │ │ │ │ └── metadata │ │ │ │ └── transfers │ │ │ │ └── .gitignore │ │ ├── new_preservation_file │ │ │ └── objects │ │ │ │ ├── evelyn_s_photo.jpg │ │ │ │ └── evelyn_s_photo-d8cc7af7-284a-42f5-b7f4-e181a0efc35f.tif │ │ ├── empty_metadata_files │ │ │ └── objects │ │ │ │ └── metadata │ │ │ │ └── archivesspaceids.csv │ │ ├── metadata_file_in_subdir_sip │ │ │ └── objects │ │ │ │ └── metadata │ │ │ │ └── foo │ │ │ │ └── foo.txt │ │ ├── custom_structmaps │ │ │ ├── custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929 │ │ │ │ └── objects │ │ │ │ │ ├── test_file.flac │ │ │ │ │ ├── test_file.jpg │ │ │ │ │ ├── test_file.mp3 │ │ │ │ │ ├── test_file.png │ │ │ │ │ ├── página_de_prueba.jpg │ │ │ │ │ ├── página_de_prueba.png │ │ │ │ │ ├── duplicate_file_name.png │ │ │ │ │ ├── nested_dir │ │ │ │ │ ├── nested_file.rdata │ │ │ │ │ └── duplicate_file_name.png │ │ │ │ │ ├── dir-with-dashes │ │ │ │ │ └── file with spaces.bin │ │ │ │ │ └── metadata │ │ │ │ │ └── transfers │ │ │ │ │ └── custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51 │ │ │ │ │ ├── path_with_spaces_structmap.xml │ │ │ │ │ ├── mets_structmap.xml │ │ │ │ │ ├── broken_structmap.xml │ │ │ │ │ ├── simple_book_structmap.xml │ │ │ │ │ ├── complex_book_structmap.xml │ │ │ │ │ ├── no-contentids.xml │ │ │ │ │ ├── unicode_simple_book_structmap.xml │ │ │ │ │ ├── empty_filenames.xml │ │ │ │ │ ├── file_does_not_exist.xml │ │ │ │ │ ├── mets_area_structmap.xml │ │ │ │ │ ├── nested_file_structmap.xml │ │ │ │ │ └── missing_contentid.xml │ │ │ └── model │ │ │ │ └── sip.json │ │ ├── archivesspaceid_sip │ │ │ └── objects │ │ │ │ └── metadata │ │ │ │ └── archivesspaceids.csv │ │ ├── metadata_csv_directories │ │ │ └── objects │ │ │ │ └── metadata │ │ │ │ └── metadata.csv │ │ ├── metadata_csv_nondc │ │ │ └── objects │ │ │ │ └── metadata │ │ │ │ └── metadata.csv │ │ ├── metadata_csv_sip │ │ │ └── objects │ │ │ │ └── metadata │ │ │ │ └── metadata.csv │ │ ├── sip.json │ │ ├── rights-unicode-filepath.csv │ │ ├── transfer.json │ │ ├── rights.csv │ │ ├── agents.json │ │ ├── microservice_agents │ │ │ └── microservice_agents.json │ │ ├── files-transfer.json │ │ ├── dublincore.json │ │ ├── events-transfer.json │ │ └── files-transfer-unicode.json │ ├── __init__.py │ ├── test_job.py │ ├── test_create_mets_v2.py │ ├── test_client.py │ ├── test_identify_file_format.py │ ├── test_has_packages.py │ ├── test_validate_file.py │ └── test_store_file_modification.py ├── conftest.py ├── server │ ├── test_translation.py │ ├── test_backend.py │ └── test_workflow.py ├── common │ ├── fixtures │ │ ├── agents.json │ │ ├── test-identifiers-MODS-METS.xml │ │ └── test_find_by_id_refid.yaml │ ├── test_execute_functions.py │ └── test_env_configparser.py └── test_registry.py ├── changelog.d └── README.txt ├── .gitattributes ├── .github ├── codeql │ └── codeql-config.yml ├── dependabot.yml ├── workflows │ ├── buf-pull-request.yml │ ├── main.yml │ ├── buf-push.yml │ ├── release.yml │ └── tests.yml └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── MANIFEST.in ├── COPYRIGHT ├── buf.gen.yaml ├── compose.yml ├── manage.py ├── .dockerignore ├── .readthedocs.yaml ├── buf.yaml ├── TRADEMARK ├── examples ├── Dockerfile └── webapp.py ├── .gitignore ├── proto └── a3m │ └── api │ └── transferservice │ └── v1beta1 │ ├── service.proto │ └── request_response.proto ├── .pre-commit-config.yaml ├── test.sh ├── README.rst ├── CONTRIBUTING.md └── Makefile /a3m/fpr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12.6 2 | -------------------------------------------------------------------------------- /a3m/settings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /a3m/fpr/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /a3m/main/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /a3m/main/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "a3m/api" 3 | -------------------------------------------------------------------------------- /a3m/cli/server/__init__.py: -------------------------------------------------------------------------------- 1 | """a3m server.""" 2 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CHANGELOG.rst 2 | -------------------------------------------------------------------------------- /buf.lock: -------------------------------------------------------------------------------- 1 | # Generated by buf. DO NOT EDIT. 2 | version: v2 3 | -------------------------------------------------------------------------------- /tests/client/fixtures/create_aip_mets/objects/evelyn_s_photo.jpg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/emptysip/objects/metadata/transfers/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/new_preservation_file/objects/evelyn_s_photo.jpg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/empty_metadata_files/objects/metadata/archivesspaceids.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /a3m/server/rpc/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import Client 2 | 3 | __all__ = ["Client"] 4 | -------------------------------------------------------------------------------- /tests/client/fixtures/metadata_file_in_subdir_sip/objects/metadata/foo/foo.txt: -------------------------------------------------------------------------------- 1 | foo 2 | -------------------------------------------------------------------------------- /changelog.d/README.txt: -------------------------------------------------------------------------------- 1 | This directory will hold the changelog entries managed by scriv. 2 | -------------------------------------------------------------------------------- /tests/client/fixtures/create_aip_mets/objects/evelyn_s_photo-d8cc7af7-284a-42f5-b7f4-e181a0efc35f.tif: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/new_preservation_file/objects/evelyn_s_photo-d8cc7af7-284a-42f5-b7f4-e181a0efc35f.tif: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py ident 2 | *.sh ident 3 | a3m/api/** linguist-generated 4 | docs/** linguist-documentation 5 | -------------------------------------------------------------------------------- /.github/codeql/codeql-config.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL config" 2 | paths-ignore: 3 | - docs 4 | - hack 5 | - tests 6 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/test_file.flac: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/test_file.jpg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/test_file.mp3: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/test_file.png: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/página_de_prueba.jpg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/página_de_prueba.png: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/duplicate_file_name.png: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/nested_dir/nested_file.rdata: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include COPYRIGHT 2 | include LICENSE 3 | include README.rst 4 | include TRADEMARK 5 | graft a3m 6 | global-exclude *.pyc 7 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/dir-with-dashes/file with spaces.bin: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/nested_dir/duplicate_file_name.png: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /a3m/cli/client/__init__.py: -------------------------------------------------------------------------------- 1 | """a3m command-line interface. 2 | 3 | It can embed the server or communicate with a remote instance. 4 | """ 5 | -------------------------------------------------------------------------------- /tests/client/fixtures/archivesspaceid_sip/objects/metadata/archivesspaceids.csv: -------------------------------------------------------------------------------- 1 | objects/evelyn's photo.jpg,a118514fab1b2ee6a7e9ad259e1de355 2 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | #reference .py { 2 | margin-bottom: 1em !important; 3 | } 4 | 5 | #reference .sig-param { 6 | color: #666 !important; 7 | } 8 | -------------------------------------------------------------------------------- /tests/client/fixtures/metadata_csv_directories/objects/metadata/metadata.csv: -------------------------------------------------------------------------------- 1 | filename,dc.title,dc.description 2 | objects/Landing_zone,The landing zone,A zone for landing 3 | -------------------------------------------------------------------------------- /a3m/main/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class MainAppConfig(AppConfig): 5 | default_auto_field = "django.db.models.AutoField" 6 | name = "a3m.main" 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | open-pull-requests-limit: 0 8 | -------------------------------------------------------------------------------- /tests/client/fixtures/metadata_csv_nondc/objects/metadata/metadata.csv: -------------------------------------------------------------------------------- 1 | filename,nondc,dc.title,custom.field,dc.description 2 | objects/evelyn's photo.jpg,Non DC metadata,Mountain Tents,A custom field,Tents on a mountain 3 | -------------------------------------------------------------------------------- /a3m/api/transferservice/v1beta1/request_response_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | 4 | import grpc 5 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | 2 | Copyright 2020 Artefactual Systems Inc. 3 | 4 | Please note, a3m also includes several third-party libraries, each with their own copyright and license terms. See http://archivematica.org/software. 5 | 6 | -------------------------------------------------------------------------------- /tests/client/fixtures/metadata_csv_sip/objects/metadata/metadata.csv: -------------------------------------------------------------------------------- 1 | filename,dc.title,dc.description 2 | objects/evelyn's photo.jpg,Mountain Tents,Tents on a mountain 3 | objects/evelyn's third photo/evelyn's third photo.jpg,Tents,Mountains blocked by tents 4 | -------------------------------------------------------------------------------- /buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v2 2 | managed: 3 | enabled: true 4 | plugins: 5 | - remote: buf.build/grpc/python:v1.66.1 6 | out: . 7 | - remote: buf.build/protocolbuffers/python:v28.2 8 | out: . 9 | - remote: buf.build/protocolbuffers/pyi:v28.2 10 | out: . 11 | -------------------------------------------------------------------------------- /compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | volumes: 3 | a3m-pipeline-data: 4 | name: "a3m-pipeline-data" 5 | services: 6 | a3m: 7 | build: 8 | context: "." 9 | volumes: 10 | - ".:/a3m" 11 | - "a3m-pipeline-data:/home/a3m/.local/share/a3m:rw" 12 | ports: 13 | - "52000:7000" 14 | -------------------------------------------------------------------------------- /a3m/api/transferservice/v1beta1/service_pb2.pyi: -------------------------------------------------------------------------------- 1 | from a3m.api.transferservice.v1beta1 import ( 2 | request_response_pb2 as _request_response_pb2, 3 | ) 4 | from google.protobuf import descriptor as _descriptor 5 | from typing import ClassVar as _ClassVar 6 | 7 | DESCRIPTOR: _descriptor.FileDescriptor 8 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "a3m.settings.common") 7 | 8 | from django.core.management import execute_from_command_line 9 | 10 | execute_from_command_line(sys.argv) 11 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | === 2 | a3m 3 | === 4 | 5 | Current release: |version|. 6 | 7 | .. include:: ../README.rst 8 | 9 | .. toctree:: 10 | :hidden: 11 | 12 | overview 13 | installation 14 | usage 15 | settings 16 | development 17 | docker 18 | contributing 19 | changelog 20 | -------------------------------------------------------------------------------- /a3m/server/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from a3m.server.tasks.backends import PoolTaskBackend 2 | from a3m.server.tasks.backends import TaskBackend 3 | from a3m.server.tasks.backends import get_task_backend 4 | from a3m.server.tasks.task import Task 5 | 6 | __all__ = ("PoolTaskBackend", "Task", "TaskBackend", "get_task_backend") 7 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | 3 | # We need .git to not be excluded from the context (hatch-vcs). 4 | !/.git/** 5 | 6 | !/a3m/** 7 | !/.python-version 8 | !/uv.lock 9 | !/pyproject.toml 10 | !/README.rst 11 | !/LICENSE 12 | 13 | **/*~ 14 | **/*.log 15 | **/.DS_Store 16 | **/Thumbs.db 17 | **/__pycache__ 18 | **/*.pyc 19 | **/*.pyo 20 | -------------------------------------------------------------------------------- /a3m/client/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | 4 | THIS_DIR = os.path.dirname(__file__) 5 | 6 | ASSETS_DIR = os.path.join(THIS_DIR, "assets", "") 7 | 8 | # Use local XML schemas for validation. 9 | os.environ["XML_CATALOG_FILES"] = str( 10 | pathlib.Path(__file__).parent / "assets/catalog/catalog.xml" 11 | ) 12 | -------------------------------------------------------------------------------- /a3m/api/transferservice/v1beta1/__init__.py: -------------------------------------------------------------------------------- 1 | from . import request_response_pb2 2 | from . import request_response_pb2_grpc 3 | from . import service_pb2 4 | from . import service_pb2_grpc 5 | 6 | 7 | __all__ = [ 8 | "request_response_pb2_grpc", 9 | "request_response_pb2", 10 | "service_pb2_grpc", 11 | "service_pb2", 12 | ] 13 | -------------------------------------------------------------------------------- /a3m/main/__init__.py: -------------------------------------------------------------------------------- 1 | from django.db.backends.signals import connection_created 2 | 3 | 4 | def activate_wal_mode(sender, connection, **kwargs): 5 | if connection.vendor == "sqlite": 6 | cursor = connection.cursor() 7 | cursor.execute("PRAGMA journal_mode=WAL") 8 | 9 | 10 | connection_created.connect(activate_wal_mode) 11 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/failed_transfer_cleanup.py: -------------------------------------------------------------------------------- 1 | from a3m.client import metrics 2 | 3 | FAILED = "fail" 4 | 5 | 6 | def call(jobs): 7 | # Transfer UUID can be found in sys.args[1] but it is currently unused. 8 | job = jobs[0] 9 | with job.JobContext(): 10 | metrics.transfer_failed(FAILED) 11 | job.set_status(0) 12 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | build: 3 | os: "ubuntu-22.04" 4 | tools: 5 | python: "3.12" 6 | commands: 7 | - pip install uv 8 | - uv sync --frozen 9 | - git fetch --unshallow 10 | - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs $READTHEDOCS_OUTPUT/html 11 | sphinx: 12 | configuration: "docs/conf.py" 13 | -------------------------------------------------------------------------------- /buf.yaml: -------------------------------------------------------------------------------- 1 | version: v2 2 | modules: 3 | - path: proto 4 | name: buf.build/artefactual/a3m 5 | lint: 6 | use: 7 | - STANDARD 8 | except: 9 | - FIELD_NOT_REQUIRED 10 | - PACKAGE_NO_IMPORT_CYCLE 11 | disallow_comment_ignores: true 12 | breaking: 13 | use: 14 | - FILE 15 | except: 16 | - EXTENSION_NO_DELETE 17 | - FIELD_SAME_DEFAULT 18 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import pytest 4 | 5 | 6 | @pytest.fixture(autouse=True) 7 | def set_xml_catalog_files(monkeypatch): 8 | """Use local XML schemas for validation.""" 9 | monkeypatch.setenv( 10 | "XML_CATALOG_FILES", 11 | str( 12 | pathlib.Path(__file__).parent.parent 13 | / "a3m/client/assets/catalog/catalog.xml" 14 | ), 15 | ) 16 | -------------------------------------------------------------------------------- /tests/client/fixtures/sip.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": "4060ee97-9c3f-4822-afaf-ebdf838284c3", 4 | "model": "main.sip", 5 | "fields": { 6 | "aip_filename": null, 7 | "currentpath": "%sharedPath%watchedDirectories/workFlowDecisions/metadataReminder/no-metadata-4060ee97-9c3f-4822-afaf-ebdf838284c3/", 8 | "createdtime": "2015-06-24T17:22:02Z", 9 | "hidden": false 10 | } 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /.github/workflows/buf-pull-request.yml: -------------------------------------------------------------------------------- 1 | name: Buf (pull request) 2 | on: pull_request 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v4 8 | - uses: bufbuild/buf-setup-action@v1 9 | with: 10 | github_token: ${{ secrets.GITHUB_TOKEN }} 11 | - uses: bufbuild/buf-lint-action@v1 12 | with: 13 | input: proto 14 | # TODO: use buf-breaking-action when ready 15 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/model/sip.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": "3a915449-d1bb-4920-b274-c917c7bb5929", 4 | "model": "main.sip", 5 | "fields": { 6 | "aip_filename": null, 7 | "currentpath": "%sharedPath%watchedDirectories/workFlowDecisions/metadataReminder/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/", 8 | "createdtime": "2019-04-10T23:13:02Z", 9 | "hidden": false 10 | } 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/cmd_cp.py: -------------------------------------------------------------------------------- 1 | from a3m.executeOrRunSubProcess import executeOrRun 2 | 3 | 4 | def call(jobs): 5 | for job in jobs: 6 | with job.JobContext(): 7 | exit_code, std_out, std_error = executeOrRun( 8 | "command", ["cp"] + job.args[1:], capture_output=True 9 | ) 10 | 11 | job.write_error(std_error) 12 | job.write_output(std_out) 13 | job.set_status(exit_code) 14 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/cmd_chmod.py: -------------------------------------------------------------------------------- 1 | from a3m.executeOrRunSubProcess import executeOrRun 2 | 3 | 4 | def call(jobs): 5 | for job in jobs: 6 | with job.JobContext(): 7 | exit_code, std_out, std_error = executeOrRun( 8 | "command", ["chmod"] + job.args[1:], capture_output=True 9 | ) 10 | 11 | job.write_error(std_error) 12 | job.write_output(std_out) 13 | job.set_status(exit_code) 14 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/cmd_mkdir.py: -------------------------------------------------------------------------------- 1 | from a3m.executeOrRunSubProcess import executeOrRun 2 | 3 | 4 | def call(jobs): 5 | for job in jobs: 6 | with job.JobContext(): 7 | exit_code, std_out, std_error = executeOrRun( 8 | "command", ["mkdir"] + job.args[1:], capture_output=True 9 | ) 10 | 11 | job.write_error(std_error) 12 | job.write_output(std_out) 13 | job.set_status(exit_code) 14 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/cmd_test.py: -------------------------------------------------------------------------------- 1 | from a3m.executeOrRunSubProcess import executeOrRun 2 | 3 | 4 | def call(jobs): 5 | for job in jobs: 6 | with job.JobContext(): 7 | exit_code, std_out, std_error = executeOrRun( 8 | "command", ["test"] + job.args[1:], capture_output=True 9 | ) 10 | 11 | job.write_error(std_error) 12 | job.write_output(std_out) 13 | job.set_status(exit_code) 14 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/cmd_tree.py: -------------------------------------------------------------------------------- 1 | from a3m.executeOrRunSubProcess import executeOrRun 2 | 3 | 4 | def call(jobs): 5 | for job in jobs: 6 | with job.JobContext(): 7 | exit_code, std_out, std_error = executeOrRun( 8 | "command", ["tree"] + job.args[1:], capture_output=True 9 | ) 10 | 11 | job.write_error(std_error) 12 | job.write_output(std_out) 13 | job.set_status(exit_code) 14 | -------------------------------------------------------------------------------- /tests/client/fixtures/rights-unicode-filepath.csv: -------------------------------------------------------------------------------- 1 | file,basis,status,determination_date,jurisdiction,start_date,end_date,terms,citation,note,grant_act,grant_restriction,grant_start_date,grant_end_date,grant_note,doc_id_type,doc_id_value,doc_id_role 2 | objects/たくさん directories/need name change/checking here/evélyn's photo.jpg,copyright,cop status,2001-01-01,cop juris,2002-02-02,2003-03-03,,,cop note,cop act,Allow,2004-04-04,2005-05-05,cop grant note,cop type,cop value,cop role 3 | -------------------------------------------------------------------------------- /a3m/bag.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from bagit import Bag 4 | from bagit import BagError 5 | 6 | 7 | def is_bag(path): 8 | """Determine whether the directory contains a BagIt package. 9 | 10 | The constructor of ``Bag`` is fast enough but we may prefer to optimize 11 | later. 12 | """ 13 | if isinstance(path, Path): 14 | path = str(path) 15 | try: 16 | Bag(path) 17 | except BagError: 18 | return False 19 | return True 20 | -------------------------------------------------------------------------------- /TRADEMARK: -------------------------------------------------------------------------------- 1 | Artefactual Systems Inc. owns all Archivematica trademarks, service marks, and graphic logos. 2 | 3 | Archivematica's LICENSE does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor except as required for reasonable and customary use in describing the origin of the Work. 4 | 5 | Guidelines for the use of Archivematica trademarks, service marks, and graphic logos are available at http://archivematica.org/trademark. 6 | 7 | -------------------------------------------------------------------------------- /a3m/main/migrations/0002_initial_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from django.core.management import call_command 4 | from django.db import migrations 5 | 6 | 7 | def load_fixtures(apps, schema_editor): 8 | fixture_file = os.path.join(os.path.dirname(__file__), "initial-data.json") 9 | call_command("loaddata", fixture_file, app_label="main", verbosity=0) 10 | 11 | 12 | class Migration(migrations.Migration): 13 | dependencies = [("main", "0001_initial")] 14 | 15 | operations = [migrations.RunPython(load_fixtures)] 16 | -------------------------------------------------------------------------------- /tests/client/__init__.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import tempfile 3 | import unittest 4 | from pathlib import Path 5 | 6 | 7 | class TempDirMixin(unittest.TestCase): 8 | """A test case mixin that creates a temporary directory. 9 | 10 | It sets ``tmpdir`` as an instance of ``pathlib.Path``. 11 | """ 12 | 13 | def setUp(self): 14 | super().setUp() 15 | self.tmpdir = Path(tempfile.mkdtemp()) 16 | self.addCleanup(self._cleanup) 17 | 18 | def _cleanup(self): 19 | shutil.rmtree(str(self.tmpdir)) 20 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Build main image 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Check out source code 11 | uses: actions/checkout@v4 12 | - uses: whoan/docker-build-with-cache-action@v8 13 | with: 14 | username: sevein 15 | password: "${{ secrets.CR_PAT }}" 16 | image_name: artefactual-labs/a3m 17 | image_tag: main 18 | registry: ghcr.io 19 | build_extra_args: "--compress=true" 20 | -------------------------------------------------------------------------------- /tests/client/fixtures/transfer.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "fields": { 4 | "accessionid": "", 5 | "description": "", 6 | "transfermetadatasetrow": null, 7 | "notes": "", 8 | "typeoftransfer": "", 9 | "hidden": false, 10 | "type": "Standard", 11 | "currentlocation": "%sharedPath%currentlyProcessing/ユニコード-e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6/", 12 | "sourceofacquisition": "" 13 | }, 14 | "model": "main.transfer", 15 | "pk": "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6" 16 | } 17 | ] 18 | -------------------------------------------------------------------------------- /a3m/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from importlib.metadata import version 4 | 5 | __version__ = version("a3m") 6 | 7 | __all__ = ["__version__"] 8 | 9 | 10 | # Hide protobuf outdated warnings (see https://github.com/grpc/grpc/issues/37609). 11 | warnings.filterwarnings( 12 | "ignore", ".*obsolete", UserWarning, "google.protobuf.runtime_version" 13 | ) 14 | 15 | # Hide warning: Other threads are currently calling into gRPC, skipping fork() handlers. 16 | # TODO: investigate root issue. 17 | os.environ.setdefault("GRPC_ENABLE_FORK_SUPPORT", "false") 18 | -------------------------------------------------------------------------------- /examples/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/artefactual-labs/a3m:main 2 | 3 | COPY webapp.py /a3m 4 | 5 | USER root 6 | 7 | RUN set -ex \ 8 | && apt-get update \ 9 | && apt-get install -y --no-install-recommends libnss3-tools \ 10 | && rm -rf /var/lib/apt/lists/* \ 11 | && curl -Ls "https://github.com/FiloSottile/mkcert/releases/download/v1.4.1/mkcert-v1.4.1-linux-amd64" > /usr/bin/mkcert \ 12 | && chmod +x /usr/bin/mkcert \ 13 | && mkcert -install \ 14 | && mkcert example.com "*.example.com" example.test localhost 127.0.0.1 ::1 15 | 16 | USER a3m 17 | 18 | ENTRYPOINT ["python", "webapp.py"] 19 | -------------------------------------------------------------------------------- /.github/workflows/buf-push.yml: -------------------------------------------------------------------------------- 1 | name: Buf (push) 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - uses: bufbuild/buf-setup-action@v1 12 | with: 13 | github_token: ${{ secrets.GITHUB_TOKEN }} 14 | - uses: bufbuild/buf-lint-action@v1.1.0 15 | with: 16 | input: proto 17 | # TODO: use buf-breaking-action when ready 18 | - uses: bufbuild/buf-push-action@v1 19 | with: 20 | input: proto 21 | buf_token: ${{ secrets.BUF_TOKEN }} 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | *.py[co] 3 | *.mo 4 | 5 | # PyPI 6 | /a3m.egg-info 7 | /build 8 | /dist 9 | 10 | # IDE specific 11 | *.directory 12 | .project 13 | .pydevproject 14 | .settings 15 | .vscode 16 | *.sublime-project 17 | *.sublime-workspace 18 | *.sw[po] 19 | *~ 20 | 21 | # SQLite databases 22 | .sqlite 23 | 24 | # coverage data file 25 | .coverage 26 | .coverage.* 27 | coverage.* 28 | 29 | # pytest's working directory 30 | .cache 31 | .pytest_cache 32 | junit.xml 33 | 34 | # mypy cache 35 | .mypy_cache/ 36 | 37 | # a3m data dir for Compose volume 38 | /hack/compose-volume 39 | 40 | # Sphinx 41 | _build/ 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve a3m 4 | --- 5 | 6 | **Describe the bug** 7 | A clear and concise description of what the bug is. 8 | 9 | **To Reproduce** 10 | Steps to reproduce the behavior: 11 | 1. Go to '...' 12 | 2. Click on '....' 13 | 3. Scroll down to '....' 14 | 4. See error 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Screenshots** 20 | If applicable, add screenshots to help explain your problem. 21 | 22 | **Additional context** 23 | Add any other context about the problem here. 24 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/path_with_spaces_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/mets_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | --- 5 | 6 | **Is your feature request related to a problem? Please describe.** 7 | 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | 12 | A clear and concise description of what you want to happen. 13 | 14 | **Describe alternatives you've considered** 15 | 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Additional context** 19 | 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/broken_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /a3m/namespaces.py: -------------------------------------------------------------------------------- 1 | dcNS = "http://purl.org/dc/elements/1.1/" 2 | dctermsNS = "http://purl.org/dc/terms/" 3 | fitsNS = "http://hul.harvard.edu/ois/xml/ns/fits/fits_output" 4 | metsNS = "http://www.loc.gov/METS/" 5 | premisNS = "http://www.loc.gov/premis/v3" 6 | xlinkNS = "http://www.w3.org/1999/xlink" 7 | xsiNS = "http://www.w3.org/2001/XMLSchema-instance" 8 | 9 | dcBNS = "{" + dcNS + "}" 10 | dctermsBNS = "{" + dctermsNS + "}" 11 | metsBNS = "{" + metsNS + "}" 12 | premisBNS = "{" + premisNS + "}" 13 | xlinkBNS = "{" + xlinkNS + "}" 14 | xsiBNS = "{" + xsiNS + "}" 15 | 16 | NSMAP = { 17 | "dc": dcNS, 18 | "dcterms": dctermsNS, 19 | "fits": fitsNS, 20 | "mets": metsNS, 21 | "premis": premisNS, 22 | "xlink": xlinkNS, 23 | "xsi": xsiNS, 24 | } 25 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/failed_sip_cleanup.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from django.db import transaction 4 | 5 | from a3m.client import metrics 6 | 7 | REJECTED = "reject" 8 | FAILED = "fail" 9 | 10 | 11 | def main(job, fail_type, sip_uuid): 12 | metrics.sip_failed(fail_type) 13 | 14 | return 0 15 | 16 | 17 | def call(jobs): 18 | parser = argparse.ArgumentParser(description="Cleanup from failed/rejected SIPs.") 19 | parser.add_argument("fail_type", help=f'"{REJECTED}" or "{FAILED}"') 20 | parser.add_argument("sip_uuid", help="%SIPUUID%") 21 | 22 | with transaction.atomic(): 23 | for job in jobs: 24 | with job.JobContext(): 25 | args = parser.parse_args(job.args[1:]) 26 | job.set_status(main(job, args.fail_type, args.sip_uuid)) 27 | -------------------------------------------------------------------------------- /a3m/cli/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from importlib.metadata import version 3 | 4 | 5 | def shim_pkg_resources(): 6 | """Injects a pkg_resources fake needed by bagit-python in Python 3.12. 7 | 8 | The underlying error is only reproducible if setuptools is not installed. 9 | """ 10 | 11 | class Distribution: 12 | def __init__(self, name): 13 | self.version = version(name) 14 | 15 | class FakeDistributionNotFound(Exception): 16 | pass 17 | 18 | def fake_get_distribution(_, name): 19 | return Distribution(name) 20 | 21 | class PkgResources: 22 | DistributionNotFound = FakeDistributionNotFound 23 | get_distribution = fake_get_distribution 24 | 25 | sys.modules["pkg_resources"] = PkgResources() # type: ignore 26 | 27 | 28 | shim_pkg_resources() 29 | -------------------------------------------------------------------------------- /proto/a3m/api/transferservice/v1beta1/service.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package a3m.api.transferservice.v1beta1; 4 | 5 | option go_package = "github.com/artefactual-labs/a3m/proto/a3m/api/transferservice/v1beta1;transferservice"; 6 | 7 | import "a3m/api/transferservice/v1beta1/request_response.proto"; 8 | 9 | service TransferService { 10 | 11 | // Submits a new transfer. 12 | rpc Submit (SubmitRequest) returns (SubmitResponse) {} 13 | 14 | // Reads the status of a given transfer. 15 | rpc Read (ReadRequest) returns (ReadResponse) {} 16 | 17 | // Lists all tasks in a given transfer. 18 | rpc ListTasks (ListTasksRequest) returns (ListTasksResponse) {} 19 | 20 | // Delete all contents from a3m's shared folders. Should only be called once processing is complete. 21 | rpc Empty (EmptyRequest) returns (EmptyResponse) {} 22 | 23 | } 24 | -------------------------------------------------------------------------------- /a3m/server/tasks/backends/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Handle offloading of Task objects to MCP Client for processing. 3 | """ 4 | 5 | from a3m.server.tasks.backends.base import TaskBackend 6 | from a3m.server.tasks.backends.pool_backend import PoolTaskBackend 7 | 8 | # This could be a configuration setting. 9 | DEFAULT_BACKEND = PoolTaskBackend 10 | 11 | # Backend is shared across all threads. 12 | backend_global = None 13 | 14 | 15 | def get_task_backend(): 16 | """Return the backend for processing tasks.""" 17 | if DEFAULT_BACKEND == PoolTaskBackend: 18 | global backend_global 19 | if backend_global is None: 20 | backend_global = PoolTaskBackend() 21 | return backend_global 22 | 23 | raise RuntimeError("Unsupported task backend") 24 | 25 | 26 | __all__ = ("PoolTaskBackend", "TaskBackend", "get_task_backend") 27 | -------------------------------------------------------------------------------- /a3m/server/processing.py: -------------------------------------------------------------------------------- 1 | from a3m.api.transferservice import v1beta1 as transfer_service_api 2 | 3 | DEFAULT_PROCESSING_CONFIG = transfer_service_api.request_response_pb2.ProcessingConfig( 4 | assign_uuids_to_directories=True, 5 | examine_contents=False, 6 | generate_transfer_structure_report=True, 7 | document_empty_directories=True, 8 | extract_packages=True, 9 | delete_packages_after_extraction=False, 10 | identify_transfer=True, 11 | identify_submission_and_metadata=True, 12 | identify_before_normalization=True, 13 | normalize=True, 14 | transcribe_files=True, 15 | perform_policy_checks_on_originals=True, 16 | perform_policy_checks_on_preservation_derivatives=True, 17 | aip_compression_level=1, 18 | aip_compression_algorithm=transfer_service_api.request_response_pb2.ProcessingConfig.AIP_COMPRESSION_ALGORITHM_S7_COPY, 19 | ) 20 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/simple_book_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/complex_book_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.6.0 4 | hooks: 5 | - id: pretty-format-json 6 | args: 7 | - "--no-ensure-ascii" 8 | - "--autofix" 9 | files: | 10 | (?x)^( 11 | a3m/assets/.*\.json| 12 | tests/server/fixtures/workflow-integration-test.json 13 | ) 14 | - repo: https://github.com/PyCQA/doc8 15 | rev: v1.1.2 16 | hooks: 17 | - id: doc8 18 | files: ^docs/.*\.rst$ 19 | - repo: https://github.com/astral-sh/ruff-pre-commit 20 | rev: v0.6.7 21 | hooks: 22 | - id: ruff 23 | args: 24 | - "--fix" 25 | - "--exit-non-zero-on-fix" 26 | - id: ruff-format 27 | - repo: https://github.com/adamchainz/django-upgrade 28 | rev: "1.21.0" 29 | hooks: 30 | - id: django-upgrade 31 | args: 32 | - "--target-version=4.2" 33 | - repo: https://github.com/astral-sh/uv-pre-commit 34 | rev: 0.4.16 35 | hooks: 36 | - id: uv-lock 37 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/no-contentids.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/copy_submission_docs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from a3m.executeOrRunSubProcess import executeOrRun 4 | 5 | 6 | def call(jobs): 7 | for job in jobs: 8 | with job.JobContext(): 9 | sip_dir = job.args[1] 10 | sip_name = job.args[2] 11 | 12 | source_dir = os.path.join( 13 | sip_dir, sip_name, "data", "objects", "submissionDocumentation" 14 | ) 15 | submission_docs_dir = os.path.join(sip_dir, "submissionDocumentation") 16 | 17 | os.makedirs(submission_docs_dir, mode=0o770, exist_ok=True) 18 | 19 | exit_code, std_out, std_error = executeOrRun( 20 | "command", 21 | ["cp", "-R", source_dir, submission_docs_dir], 22 | capture_output=True, 23 | ) 24 | 25 | job.write_error(std_error) 26 | job.write_output(std_out) 27 | job.set_status(exit_code) 28 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/unicode_simple_book_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/empty_filenames.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | print_status() { 6 | echo -en "\n➡️ $1\n\n" 7 | } 8 | 9 | if ! command -v uv > /dev/null; then 10 | echo "Error: 'uv' is not installed or not in the PATH." 11 | echo "To install it, run:" 12 | echo " $ curl -LsSf https://astral.sh/uv/install.sh | sh" 13 | exit 1 14 | fi 15 | 16 | curdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 17 | 18 | print_status "Running \`ruff check\`..." 19 | uv run --frozen -- ruff check 20 | 21 | print_status "Running \`ruff format --check\`..." 22 | uv run --frozen -- ruff format --check 23 | 24 | print_status "Running \`mypy\`..." 25 | uv run --frozen -- mypy 26 | 27 | print_status "Running \`pytest\`..." 28 | uv run --frozen -- pytest \ 29 | --junitxml=junit.xml \ 30 | --override-ini=junit_family=legacy \ 31 | --cov \ 32 | --cov-append \ 33 | --cov-report xml:coverage.xml \ 34 | --cov-report html 35 | 36 | print_status "Running \`pre-commit\`..." 37 | uv run --frozen -- pre-commit run --all-files 38 | -------------------------------------------------------------------------------- /tests/client/test_job.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | 3 | from a3m.client.job import Job 4 | 5 | UNICODE = "‘你好‘" 6 | NON_ASCII = "‘你好‘" 7 | 8 | 9 | def test_job_encoding(): 10 | job = Job(name="somejob", uuid=str(uuid4()), args=["a", "b"]) 11 | 12 | job.pyprint(UNICODE) 13 | stdout = job.get_stdout() 14 | expected_stdout = f"{UNICODE}\n" 15 | expected_output = f"{UNICODE}\n" 16 | assert job.output == expected_output 17 | assert stdout == expected_stdout 18 | assert isinstance(job.output, str) 19 | assert isinstance(stdout, str) 20 | 21 | job.print_error(NON_ASCII) 22 | stderr = job.get_stderr() 23 | expected_stderr = f"{NON_ASCII}\n" 24 | expected_error = f"{NON_ASCII}\n" 25 | assert job.error == expected_error 26 | assert stderr == expected_stderr 27 | assert isinstance(job.error, str) 28 | assert isinstance(stderr, str) 29 | 30 | job_dump = job.dump() 31 | assert job.UUID in job_dump 32 | assert stderr in job_dump 33 | assert stdout in job_dump 34 | -------------------------------------------------------------------------------- /a3m/cli/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import sys 4 | import warnings 5 | 6 | import django 7 | from django.conf import settings 8 | 9 | 10 | def suppress_warnings(): 11 | """Suppress SyntaxWarning. 12 | 13 | Hiding SyntaxWarning from users since it can be misleading. 14 | """ 15 | if settings.DEBUG or sys.warnoptions: 16 | return 17 | warnings.simplefilter("ignore", SyntaxWarning) 18 | 19 | 20 | def init_django(): 21 | """Initialize our Django project. 22 | 23 | Why do we need this? Django does not let us import models unless Django 24 | itself is set up. The alternative is lazy imports but we are not taking 25 | that approach at the moment. 26 | """ 27 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "a3m.settings.common") 28 | django.setup() 29 | 30 | 31 | def configure_xml_catalog_files(): 32 | """Use local XML schemas for validation.""" 33 | os.environ["XML_CATALOG_FILES"] = str( 34 | pathlib.Path(__file__).parent.parent / "client/assets/catalog/catalog.xml" 35 | ) 36 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/file_does_not_exist.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/client/test_create_mets_v2.py: -------------------------------------------------------------------------------- 1 | from a3m.client.clientScripts.create_mets_v2 import createDMDIDsFromCSVMetadata 2 | 3 | 4 | def test_createDMDIDsFromCSVMetadata_finds_non_ascii_paths(mocker): 5 | dmd_secs_creator_mock = mocker.patch( 6 | "a3m.client.clientScripts.create_mets_v2.createDmdSecsFromCSVParsedMetadata", 7 | return_value=[], 8 | ) 9 | state_mock = mocker.Mock( 10 | **{ 11 | "CSV_METADATA": { 12 | "montréal": "montreal metadata", 13 | "dvořák": "dvorak metadata", 14 | } 15 | } 16 | ) 17 | 18 | createDMDIDsFromCSVMetadata(None, "montréal", state_mock) 19 | createDMDIDsFromCSVMetadata(None, "toronto", state_mock) 20 | createDMDIDsFromCSVMetadata(None, "dvořák", state_mock) 21 | 22 | dmd_secs_creator_mock.assert_has_calls( 23 | [ 24 | mocker.call(None, "montreal metadata", state_mock), 25 | mocker.call(None, {}, state_mock), 26 | mocker.call(None, "dvorak metadata", state_mock), 27 | ] 28 | ) 29 | -------------------------------------------------------------------------------- /a3m/settings/test.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2018 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | """Test settings and globals.""" 18 | 19 | from .common import * 20 | 21 | 22 | DATABASES = { 23 | "default": { 24 | "ENGINE": "django.db.backends.sqlite3", 25 | "NAME": str(get_data_dir() / "db.sqlite"), 26 | "TEST": {"NAME": str(get_data_dir() / "dbtest.sqlite")}, 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tests/client/fixtures/rights.csv: -------------------------------------------------------------------------------- 1 | file,basis,status,determination_date,jurisdiction,start_date,end_date,terms,citation,note,grant_act,grant_restriction,grant_start_date,grant_end_date,grant_note,doc_id_type,doc_id_value,doc_id_role 2 | objects/G31DS.TIF,copyright,cop status,2001-01-01,cop juris,2002-02-02,2003-03-03,,,cop note,cop act,Allow,2004-04-04,2005-05-05,cop grant note,cop type,cop value,cop role 3 | objects/G31DS.TIF,copyright,cop status2,,cop juris2,,,,,cop note 2,cop act,Disallow,,,,,, 4 | objects/G31DS.TIF,copyright,cop status3,2001-01-01,cop juris3,2002-02-02,2003-03-03,,,cop note 3,cop act2,Allow,2004-04-04,2005-05-05,cop grant note3,cop type3,, 5 | objects/G31DS.TIF,license,,,,1982-01-01,1983-02-02,lic terms,,lic note,lic act,Allow,,,,license type,license value, 6 | objects/G31DS.TIF,statute,,1972-02-02,stat juris,1966-01-01,open,stat terms,stat cit,statute note,stat act,Allow,,,,statute type,statute value,statute role 7 | objects/G31DS.TIF,other,,,,1945-01-01,1950-05-05,,,other note,other act,Allow,1920-01-01,1921-01-01,other grant note,,, 8 | objects/lion.svg,donor,,,,,,,,,donor act,,,,,,, 9 | objects/lion.svg,policy,,,,,,,,,policy act,,,,,,, 10 | -------------------------------------------------------------------------------- /a3m/server/jobs/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A job corresponds to a microservice, a link in the workflow, and the `Jobs` 3 | table in the database. 4 | 5 | Initialization of `Job` objects is typically done via a `JobChain`, 6 | corresponding to a chain in the workflow. The `JobChain` object handles 7 | determining the next job to be run, and passing data between jobs. 8 | 9 | The `Job` class is a base class for other job types. There are various 10 | concrete types of jobs, handled by subclasses: 11 | * `ClientScriptJob`, handling Jobs to be execute on MCPClient 12 | * `NextLinkDecisionJob`, handling workflow decision points 13 | """ 14 | 15 | from a3m.server.jobs.base import Job 16 | from a3m.server.jobs.chain import JobChain 17 | from a3m.server.jobs.client import ClientScriptJob 18 | from a3m.server.jobs.client import DirectoryClientScriptJob 19 | from a3m.server.jobs.client import FilesClientScriptJob 20 | from a3m.server.jobs.decisions import NextLinkDecisionJob 21 | 22 | __all__ = ( 23 | "ClientScriptJob", 24 | "DirectoryClientScriptJob", 25 | "FilesClientScriptJob", 26 | "Job", 27 | "JobChain", 28 | "NextLinkDecisionJob", 29 | ) 30 | -------------------------------------------------------------------------------- /tests/client/fixtures/agents.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "main.agent", 5 | "fields": { 6 | "agenttype": "software", 7 | "identifiervalue": "Archivematica-1.4.0", 8 | "name": "Archivematica", 9 | "identifiertype": "preservation system" 10 | } 11 | }, 12 | { 13 | "pk": 2, 14 | "model": "main.agent", 15 | "fields": { 16 | "agenttype": "organization", 17 | "identifiervalue": "demo", 18 | "name": "demo", 19 | "identifiertype": "repository code" 20 | } 21 | }, 22 | { 23 | "fields": { 24 | "agenttype": "Archivematica user", 25 | "identifiervalue": "1", 26 | "name": "username=\"kmindelan\", first_name=\"Keladry\", last_name=\"Mindelan\"", 27 | "identifiertype": "Archivematica user pk" 28 | }, 29 | "model": "main.agent", 30 | "pk": 3 31 | }, 32 | { 33 | "pk": 4, 34 | "model": "main.agent", 35 | "fields": { 36 | "agenttype": "organization", 37 | "identifiervalue": "Unrelated Agent", 38 | "name": "Unrelated Agent", 39 | "identifiertype": "repository code" 40 | } 41 | } 42 | ] 43 | -------------------------------------------------------------------------------- /a3m/server/shared_dirs.py: -------------------------------------------------------------------------------- 1 | """Shared directory setup.""" 2 | 3 | import logging 4 | import os 5 | import shutil 6 | 7 | from django.conf import settings 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def create(): 13 | dirs = ( 14 | "currentlyProcessing/transfer", 15 | "currentlyProcessing/ingest", 16 | "completed", 17 | "failed", 18 | "policies", 19 | "tmp", 20 | ) 21 | for dirname in dirs: 22 | dirname = os.path.join(settings.SHARED_DIRECTORY, dirname) 23 | if os.path.isdir(dirname): 24 | continue 25 | logger.debug("Creating directory: %s", dirname) 26 | os.makedirs(dirname, mode=0o770) 27 | 28 | 29 | def empty(): 30 | dirs = ( 31 | "currentlyProcessing", 32 | "completed", 33 | "failed", 34 | "policies", 35 | "tmp", 36 | ) 37 | for dirname in dirs: 38 | dirname = os.path.join(settings.SHARED_DIRECTORY, dirname) 39 | if os.path.isdir(dirname): 40 | logger.debug("Removing directory and contents: %s", dirname) 41 | shutil.rmtree(dirname, ignore_errors=True) 42 | -------------------------------------------------------------------------------- /tests/client/fixtures/microservice_agents/microservice_agents.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "fields": { 4 | "agenttype": "software", 5 | "identifiertype": "preservation system", 6 | "identifiervalue": "Archivematica-1.10", 7 | "name": "Archivematica" 8 | }, 9 | "model": "main.agent", 10 | "pk": 1 11 | }, 12 | { 13 | "fields": { 14 | "agenttype": "organization", 15 | "identifiertype": "repository code", 16 | "identifiervalue": "Atefactual Systems Inc.", 17 | "name": "Artefactual Systems Corporate Archive" 18 | }, 19 | "model": "main.agent", 20 | "pk": 2 21 | }, 22 | { 23 | "fields": { 24 | "agenttype": "Archivematica user", 25 | "identifiertype": "Archivematica user pk", 26 | "identifiervalue": "\u30a8\u30ba\u30e1\u30ec\u30eb\u30c0", 27 | "name": "username=\"\u30a8\u30ba\u30e1\u30ec\u30eb\u30c0\", first_name=\"\u3053\u3093\u306b\u3061\u306f\", last_name=\"\u4e16\u754c\"" 28 | }, 29 | "model": "main.agent", 30 | "pk": 3 31 | } 32 | ] 33 | -------------------------------------------------------------------------------- /a3m/client/assets/catalog/catalog.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 13 | 14 | 17 | 18 | 19 | 22 | 23 | 26 | 27 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := html 2 | 3 | # You can set these variables from the command line. 4 | SPHINXOPTS = -W 5 | SPHINXBUILD = sphinx-build 6 | PAPER = 7 | BUILDDIR = _build 8 | STATICDIR = _static 9 | 10 | # Internal variables. 11 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(SPHINXOPTS) . 12 | 13 | .PHONY: help 14 | help: 15 | @echo "Please use \`make ' where is one of" 16 | @echo " html to make standalone HTML files" 17 | @echo " linkcheck to check all external links for integrity" 18 | 19 | .PHONY: clean 20 | clean: 21 | rm -rf $(BUILDDIR) 22 | 23 | .PHONY: html 24 | html: 25 | mkdir -p $(STATICDIR) 26 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 27 | @echo 28 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 29 | 30 | .PHONY: linkcheck 31 | linkcheck: 32 | mkdir -p $(STATICDIR) 33 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 34 | @echo 35 | @echo "Link check complete; look for any errors in the above output " \ 36 | "or in $(BUILDDIR)/linkcheck/output.txt." 37 | 38 | .PHONY: livehtml 39 | livehtml: 40 | sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 41 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/manual_normalization_identify_files_included.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | # fileOperations requires Django to be set up 18 | from django.db import transaction 19 | 20 | from a3m.fileOperations import updateFileGrpUse 21 | 22 | 23 | def call(jobs): 24 | with transaction.atomic(): 25 | for job in jobs: 26 | with job.JobContext(): 27 | fileUUID = job.args[1] 28 | updateFileGrpUse(fileUUID, "manualNormalization") 29 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/remove_directories.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import shutil 19 | 20 | 21 | def call(jobs): 22 | for job in jobs: 23 | with job.JobContext(): 24 | for directory in job.args[1:]: 25 | if os.path.isdir(directory): 26 | job.pyprint("Removing directory:", directory) 27 | shutil.rmtree(directory) 28 | else: 29 | job.pyprint("Directory does not exist:", directory) 30 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/examine_contents.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from a3m.executeOrRunSubProcess import executeOrRun 4 | 5 | 6 | def main(job, target, output): 7 | args = ["bulk_extractor", target, "-o", output, "-M", "250", "-q", "-1"] 8 | try: 9 | os.makedirs(output) 10 | 11 | _, stdout, stderr = executeOrRun("command", args, capture_output=True) 12 | 13 | job.write_output(stdout) 14 | job.write_error(stderr) 15 | 16 | # remove empty BulkExtractor logs 17 | for filename in os.listdir(output): 18 | filepath = os.path.join(output, filename) 19 | if os.path.getsize(filepath) == 0: 20 | os.remove(filepath) 21 | return 0 22 | except Exception as e: 23 | return e 24 | 25 | 26 | def call(jobs): 27 | for job in jobs: 28 | with job.JobContext(): 29 | target = job.args[1] 30 | sipdir = job.args[2] 31 | file_uuid = job.args[3] 32 | output = os.path.join(sipdir, "logs", "bulk-" + file_uuid) 33 | result = main(job, target, output) 34 | 35 | if isinstance(result, Exception): 36 | job.print_error(str(result)) 37 | job.set_status(1) 38 | else: 39 | job.set_status(0) 40 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | from datetime import datetime 5 | 6 | sys.path.append(os.path.abspath("../")) 7 | 8 | from a3m.cli.common import init_django 9 | 10 | init_django() 11 | 12 | needs_sphinx = "3.2" 13 | 14 | extensions = [ 15 | "sphinx.ext.autodoc", 16 | "sphinx.ext.viewcode", 17 | "sphinxcontrib.mermaid", 18 | ] 19 | 20 | autoclass_content = "both" 21 | autodoc_member_order = "bysource" 22 | source_suffix = ".rst" 23 | master_doc = "index" 24 | project = "a3m" 25 | author = "%d Artefactual Systems Inc." % datetime.now().year 26 | 27 | output = os.popen("git describe --tags --abbrev=0").read().strip() # nosec 28 | release = re.sub("^v", "", output) 29 | version = release 30 | 31 | language = "en" 32 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 33 | pygments_style = "sphinx" 34 | todo_include_todos = True 35 | 36 | html_theme = "alabaster" 37 | html_theme_options = { 38 | "description": "Lightweight Archivematica", 39 | "fixed_sidebar": True, 40 | "github_user": "artefactual-labs", 41 | "github_repo": "a3m", 42 | "github_banner": False, 43 | "github_button": False, 44 | } 45 | html_static_path = ["_static"] 46 | htmlhelp_basename = "a3mdoc" 47 | 48 | suppress_warnings = ["image.nonlocal_uri"] 49 | 50 | mermaid_version = "8.8.2" 51 | -------------------------------------------------------------------------------- /a3m/server/tasks/backends/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | from django.conf import settings 4 | 5 | 6 | class TaskBackend(metaclass=abc.ABCMeta): 7 | """Handles out of process `Task` execution.""" 8 | 9 | # The number of files we'll pack into each MCP Client job. Chosen somewhat 10 | # arbitrarily, but benchmarking with larger values (like 512) didn't make 11 | # much difference to throughput. 12 | # 13 | # Setting this too large will use more memory; setting it too small will 14 | # hurt throughput. So the trick is to set it juuuust right. 15 | TASK_BATCH_SIZE = settings.BATCH_SIZE 16 | 17 | @abc.abstractmethod 18 | def submit_task(self, job, task): 19 | """Submit a task as part of the job given, for offline processing.""" 20 | 21 | @abc.abstractmethod 22 | def wait_for_results(self, job): 23 | """Generator that yields `Task` objects related to the job given, 24 | as they are processed by the backend. 25 | 26 | This method should only be called once all tasks related to the job 27 | have been submitted, via `submit_task`. 28 | 29 | Note that task objects are not necessarily returned in the order 30 | they were submitted. 31 | """ 32 | 33 | def shutdown(self, wait=True): 34 | """Shut down the backend.""" 35 | -------------------------------------------------------------------------------- /tests/server/test_translation.py: -------------------------------------------------------------------------------- 1 | from a3m.server.translation import UNKNOWN_TRANSLATION_LABEL 2 | from a3m.server.translation import TranslationLabel 3 | 4 | 5 | def test_translation_label(mocker): 6 | mocker.patch("a3m.server.translation.FALLBACK_LANG", "en") 7 | tr = TranslationLabel({"en": "cat", "es": "gato"}) 8 | assert repr(tr) == "TranslationLabel({'en': 'cat', 'es': 'gato'})" 9 | assert str(tr) == "cat" 10 | assert tr["es"] == "gato" 11 | assert tr["unexistent-lang-code"] == "cat" 12 | assert tr.get_label(lang="es") == "gato" 13 | assert tr.get_label(lang="is", fallback_label="köttur") == "köttur" 14 | assert tr.get_label(lang="??") == "cat" 15 | mocker.patch("a3m.server.translation.FALLBACK_LANG", "xx") 16 | assert tr.get_label(lang="yy") == UNKNOWN_TRANSLATION_LABEL 17 | 18 | 19 | def test_translation_label_with_prepared_codes(mocker): 20 | mocker.patch("a3m.server.translation.FALLBACK_LANG", "en") 21 | tr = TranslationLabel({"en": "dog", "pt_BR": "cão"}) 22 | assert tr.get_label(lang="en") == "dog" 23 | assert tr.get_label(lang="pt-br") == "cão" 24 | assert tr.get_label(lang="pt_BR") == "cão" 25 | 26 | 27 | def test_translation_label_string(mocker): 28 | mocker.patch("a3m.server.translation.FALLBACK_LANG", "en") 29 | tr = TranslationLabel("cat") 30 | assert repr(tr) == "TranslationLabel({'en': 'cat'})" 31 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |pypi| |license| |pyvers| |tests| |coverage| 2 | 3 | What is a3m? 4 | ------------ 5 | 6 | *a3m* is a lightweight version of Archivematica focused on AIP creation. It has 7 | neither external dependencies, integration with access sytems, search 8 | capabilities nor a graphical interface. 9 | 10 | All functionality is made available as a `gRPC `_ service 11 | with a minimal set of methods and strongly typed messages. a3m can be executed 12 | as a standalone process or be embedded as part of your application. 13 | 14 | For more documentation, please see https://a3m.readthedocs.io. 15 | 16 | ---------- 17 | 18 | **a3m is a proof of concept. Please send us your feedback!** 19 | 20 | .. |pypi| image:: https://img.shields.io/pypi/v/a3m.svg 21 | :target: https://pypi.python.org/pypi/a3m 22 | 23 | .. |license| image:: https://img.shields.io/pypi/l/a3m.svg 24 | :target: https://github.com/artefactual-labs/a3m 25 | 26 | .. |pyvers| image:: https://img.shields.io/pypi/pyversions/a3m.svg 27 | :target: https://pypi.python.org/pypi/a3m 28 | 29 | .. |tests| image:: https://github.com/artefactual-labs/a3m/workflows/Tests/badge.svg 30 | :target: https://github.com/artefactual-labs/a3m/actions?query=workflow%3ATests 31 | 32 | .. |coverage| image:: https://img.shields.io/codecov/c/github/artefactual-labs/a3m 33 | :target: https://codecov.io/gh/artefactual-labs/a3m 34 | -------------------------------------------------------------------------------- /tests/client/fixtures/files-transfer.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "fields": { 4 | "filegrpuuid": "", 5 | "sip": null, 6 | "checksumtype": "", 7 | "originallocation": "%transferDirectory%objects/G31DS.TIF", 8 | "transfer": "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6", 9 | "filegrpuse": "original", 10 | "removedtime": null, 11 | "label": "", 12 | "checksum": "", 13 | "enteredsystem": "2017-01-04T19:35:20Z", 14 | "modificationtime": "2017-01-04T19:35:20Z", 15 | "currentlocation": "%transferDirectory%objects/G31DS.TIF", 16 | "size": 125968 17 | }, 18 | "model": "main.file", 19 | "pk": "47813453-6872-442b-9d65-6515be3c5aa1" 20 | }, 21 | { 22 | "fields": { 23 | "filegrpuuid": "", 24 | "sip": null, 25 | "checksumtype": "", 26 | "originallocation": "%transferDirectory%objects/lion.svg", 27 | "transfer": "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6", 28 | "filegrpuse": "original", 29 | "removedtime": null, 30 | "label": "", 31 | "checksum": "", 32 | "enteredsystem": "2017-01-04T19:35:20Z", 33 | "modificationtime": "2017-01-04T19:35:20Z", 34 | "currentlocation": "%transferDirectory%objects/lion.svg", 35 | "size": 18324 36 | }, 37 | "model": "main.file", 38 | "pk": "60e5c61b-14ef-4e92-89ec-9b9201e68adb" 39 | } 40 | ] 41 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/check_transfer_directory_for_objects.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | 19 | 20 | def call(jobs): 21 | """ 22 | Check the given directory and it's subdirectories for files. 23 | Returns job status 0 if there are files. 24 | Returns job status 1 if the directories are empty. 25 | """ 26 | for job in jobs: 27 | with job.JobContext(): 28 | objects_dir = job.args[1] 29 | os.path.isdir(objects_dir) 30 | for _, _, files in os.walk(objects_dir): 31 | if files: 32 | return 33 | job.set_status(1) 34 | -------------------------------------------------------------------------------- /a3m/server/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | MCPServer (Master Control Program Server) determines the jobs/tasks/microservices 3 | run by Archivematica and arranges for their execution. 4 | 5 | It makes use of the following major abstractions: 6 | * `workflow.Workflow` (and related classes `Link` and `Chain`) handle the workflow 7 | logic for Archivematica, described in workflow.json. 8 | * `jobs.base.Job` and subclasses handle execution of a single link of the workflow. 9 | * `jobs.client.ClientScriptJob` for jobs to be executed via MCPClient script 10 | * `jobs.client.DecisionJob` for workflow decision points 11 | * `jobs.chain.JobChain` handles passing context between jobs, and determining the 12 | next `Job` to be executed based on the workflow chain 13 | * `tasks.Task` corresponds to a single command to be executed by MCPClient 14 | * `tasks.backends.PoolTaskBackend` handles passing tasks to the executor 15 | (in batches) 16 | * `packages.Package` subclasses `SIP` and `Transfer` handle package 17 | related logic 18 | * a `concurrent.futures.ThreadPoolExecutor` handles out of process execution 19 | * `queues.PackageQueue` handles scheduling of `Job` objects for execution 20 | (throttled per package). The package queue is thread-safe. 21 | * `rpc_server.RPCServer` handles RPC requests from the dashboard, which arrive 22 | as gearman jobs. 23 | """ 24 | -------------------------------------------------------------------------------- /a3m/server/jobs/decisions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Jobs relating to configurable decisions. 3 | """ 4 | 5 | import logging 6 | 7 | from a3m.server.jobs.base import Job 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class NextLinkDecisionJob(Job): 13 | """A job that determines the next link to be executed.""" 14 | 15 | def run(self, *args, **kwargs): 16 | super().run(*args, **kwargs) 17 | 18 | logger.debug("Running %s (package %s)", self.description, self.package.uuid) 19 | 20 | # Reload the package, in case the path has changed 21 | self.package.reload() 22 | self.save_to_db() 23 | 24 | self.job_chain.next_link = self.decide() 25 | 26 | return next(self.job_chain, None) 27 | 28 | def decide(self): 29 | config = self.link.config 30 | 31 | config_value = self.get_configured_value(config["config_attr"]) 32 | if config_value is None: 33 | config_value = config["default"] 34 | 35 | next_id = None 36 | for item in config["choices"]: 37 | if item["value"] == config_value: 38 | next_id = item["link_id"] 39 | break 40 | 41 | logger.debug("Using user selected link %s", next_id) 42 | self.mark_complete() 43 | 44 | return next_id 45 | 46 | def get_configured_value(self, attr_name): 47 | return getattr(self.package.config, attr_name, None) 48 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/mets_area_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/save_dublin_core.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | from django.db import transaction 5 | 6 | from a3m.main import models 7 | 8 | FIELDS = ( 9 | "title", 10 | "creator", 11 | "subject", 12 | "description", 13 | "publisher", 14 | "contributor", 15 | "date", 16 | "type", 17 | "format", 18 | "identifier", 19 | "source", 20 | "relation", 21 | "language", 22 | "coverage", 23 | "rights", 24 | ) 25 | 26 | 27 | def main(job, transfer_uuid, target_path): 28 | jsonified = {} 29 | try: 30 | dc = models.DublinCore.objects.get(metadataappliestoidentifier=transfer_uuid) 31 | except: # There may not be any DC metadata for this transfer, and that's fine 32 | job.pyprint("No DC metadata found; skipping", file=sys.stderr) 33 | return 0 34 | for field in FIELDS: 35 | attr = getattr(dc, field) 36 | if attr: 37 | jsonified[field] = attr 38 | 39 | job.pyprint("Saving the following properties to:", target_path) 40 | job.pyprint(jsonified) 41 | 42 | with open(target_path, "w") as json_file: 43 | json.dump(jsonified, json_file) 44 | return 0 45 | 46 | 47 | def call(jobs): 48 | with transaction.atomic(): 49 | for job in jobs: 50 | with job.JobContext(): 51 | transfer_uuid = job.args[1] 52 | target_path = job.args[2] 53 | job.set_status(main(job, transfer_uuid, target_path)) 54 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/nested_file_structmap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /tests/client/test_client.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from a3m.client.mcp import handle_batch_task 4 | 5 | 6 | @pytest.mark.django_db 7 | def test_handle_batch_task_replaces_non_ascii_arguments(mocker): 8 | # We are only interested in verifying the string replacement logic 9 | # for task arguments and mock the remaining functionality 10 | mocker.patch("a3m.client.mcp.Job") 11 | mocker.patch("a3m.client.mcp.Task") 12 | mocker.patch("a3m.client.mcp.retryOnFailure") 13 | 14 | # This is the only function that uses the arguments after the replacements 15 | _parse_command_line = mocker.patch("a3m.client.mcp._parse_command_line") 16 | 17 | # The mocked module will not have a `concurrent_instances` attribute 18 | mocker.patch( 19 | "importlib.import_module", return_value=mocker.MagicMock(spec=["call"]) 20 | ) 21 | 22 | # Mock the two parameters sent to handle_batch_task 23 | task_name = "tásk".encode() 24 | batch_payload = { 25 | "tasks": { 26 | "some_task_uuid": { 27 | "uuid": "some_task_uuid", 28 | "arguments": "montréal %taskUUID% %jobCreatedDate%", 29 | "createdDate": "some montréal datetime", 30 | "wants_output": False, 31 | "execute": "command", 32 | } 33 | } 34 | } 35 | handle_batch_task(task_name, batch_payload) 36 | 37 | # Check that string replacement were successful 38 | _parse_command_line.assert_called_once_with( 39 | "montréal some_task_uuid some montréal datetime" 40 | ) 41 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/check_for_submission_documentation.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import sys 19 | 20 | 21 | def call(jobs): 22 | for job in jobs: 23 | with job.JobContext(): 24 | target = job.args[1] 25 | if not os.path.isdir(target): 26 | job.pyprint("Directory doesn't exist: ", target, file=sys.stderr) 27 | os.mkdir(target) 28 | if os.listdir(target) == []: 29 | job.pyprint("Directory is empty: ", target, file=sys.stderr) 30 | fileName = os.path.join(target, "submissionDocumentation.log") 31 | f = open(fileName, "a") 32 | f.write("No submission documentation added") 33 | f.close() 34 | os.chmod(fileName, 0o600) 35 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/manual_normalization_check_for_manual_normalization_directory.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | 19 | 20 | def call(jobs): 21 | for job in jobs: 22 | with job.JobContext(): 23 | sip_dir = job.args[1] 24 | manualNormalizationPath = os.path.join( 25 | sip_dir, "objects", "manualNormalization" 26 | ) 27 | job.pyprint("Manual normalization path:", manualNormalizationPath) 28 | if os.path.isdir(manualNormalizationPath): 29 | mn_preserve_path = os.path.join(manualNormalizationPath, "preservation") 30 | if os.path.isdir(mn_preserve_path) and os.listdir(mn_preserve_path): 31 | job.pyprint("Manually normalized files found") 32 | job.set_status(179) 33 | continue 34 | 35 | job.set_status(0) 36 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/has_packages.py: -------------------------------------------------------------------------------- 1 | from a3m.fpr.registry import FPR 2 | from a3m.fpr.registry import RulePurpose 3 | from a3m.main.models import Event 4 | from a3m.main.models import File 5 | from a3m.main.models import Transfer 6 | 7 | 8 | def is_extractable(f) -> bool: 9 | """Determine whether the package is extractable.""" 10 | return len(FPR.get_file_rules(f, purpose=RulePurpose.EXTRACT)) > 0 11 | 12 | 13 | def already_extracted(f) -> bool: 14 | """Determine whether the package has already been extracted.""" 15 | # Look for files in a directory that starts with the package name 16 | files = File.objects.filter( 17 | transfer=f.transfer, 18 | currentlocation__startswith=f.currentlocation, 19 | removedtime__isnull=True, 20 | ).exclude(uuid=f.uuid) 21 | # Check for unpacking events that reference the package 22 | if Event.objects.filter( 23 | file_uuid__in=files, 24 | event_type="unpacking", 25 | event_detail__contains=f.currentlocation, 26 | ).exists(): 27 | return True 28 | return False 29 | 30 | 31 | def main(job, sip_uuid): 32 | transfer = Transfer.objects.get(uuid=sip_uuid) 33 | for f in transfer.file_set.filter(removedtime__isnull=True).iterator(): 34 | if is_extractable(f) and not already_extracted(f): 35 | job.pyprint( 36 | f.currentlocation, 37 | "is extractable and has not yet been extracted.", 38 | ) 39 | return 0 40 | job.pyprint("No extractable files found.") 41 | return 1 42 | 43 | 44 | def call(jobs): 45 | for job in jobs: 46 | with job.JobContext(): 47 | job.set_status(main(job, job.args[1])) 48 | -------------------------------------------------------------------------------- /docs/settings.rst: -------------------------------------------------------------------------------- 1 | Settings 2 | ======== 3 | 4 | Users can provide service settings via the ``/etc/a3m/a3m.conf`` configuration 5 | file, e.g.:: 6 | 7 | [a3m] 8 | debug = False 9 | 10 | Environment strings are also supported and they are evaluated last, e.g.:: 11 | 12 | env A3M_DEBUG=yes a3m ... 13 | 14 | Configuration settings are not properly described yet, but here's the list: 15 | 16 | * ``debug`` (boolean) 17 | * ``batch_size`` (int) 18 | * ``concurrent_packages`` (int) 19 | * ``rpc_threads`` (int) 20 | * ``worker_threads`` (int) 21 | * ``shared_directory`` (string) 22 | * ``temp_directory`` (string) 23 | * ``processing_directory`` (string) 24 | * ``rejected_directory`` (string) 25 | * ``capture_client_script_output`` (boolean) 26 | * ``removable_files`` (string) 27 | * ``secret_key`` (string) 28 | * ``prometheus_bind_address`` (string) 29 | * ``prometheus_bind_port`` (string) 30 | * ``time_zone`` (string) 31 | * ``db_engine`` (string) 32 | * ``db_name`` (string) 33 | * ``db_user`` (string) 34 | * ``db_password`` (string) 35 | * ``db_host`` (string) 36 | * ``db_port`` (string) 37 | * ``rpc_bind_address`` (string) 38 | * ``s3_enabled`` (boolean) 39 | * ``s3_endpoint_url`` (string) 40 | * ``s3_region_name`` (string) 41 | * ``s3_access_key_id`` (string) 42 | * ``s3_secret_access_key`` (string) 43 | * ``s3_use_ssl`` (boolean) 44 | * ``s3_addressing_style`` (string) 45 | * ``s3_signature_version`` (string) 46 | * ``s3_bucket`` (string) 47 | * ``org_id`` (string) 48 | * ``org_name`` (string) 49 | 50 | For greater flexibility, it is also possible to alter the applicatin settings 51 | module manually. This is how our :mod:`a3m.settings.common` module looks like: 52 | 53 | .. literalinclude:: ../a3m/settings/common.py 54 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/load_dublin_core.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | 5 | from django.db import transaction 6 | 7 | from a3m.main import models 8 | 9 | # This is the UUID of SIP from the `MetadataAppliesToTypes` table 10 | INGEST_METADATA_TYPE = "3e48343d-e2d2-4956-aaa3-b54d26eb9761" 11 | 12 | 13 | def main(job, sip_uuid, dc_path): 14 | # If there's no metadata, that's not an error, and just keep going 15 | if not os.path.exists(dc_path): 16 | job.pyprint("DC metadata not found; exiting", "(at", dc_path + ")") 17 | return 0 18 | 19 | job.pyprint("Loading DC metadata from", dc_path) 20 | with open(dc_path) as json_data: 21 | data = json.load(json_data) 22 | dc = models.DublinCore( 23 | metadataappliestoidentifier=sip_uuid, 24 | metadataappliestotype_id=INGEST_METADATA_TYPE, 25 | ) 26 | for key, value in data.items(): 27 | try: 28 | setattr(dc, key, value) 29 | except AttributeError: 30 | job.pyprint("Invalid DC attribute:", key, file=sys.stderr) 31 | 32 | dc.save() 33 | 34 | # ``dc.json`` was copied to ingest so the code above could read it, but we 35 | # don't need it anymore so we're removing it. 36 | try: 37 | job.pyprint('Removing "dc.json":', dc_path) 38 | os.remove(dc_path) 39 | except Exception as err: 40 | job.pyprint('Unable to remove "dc.json":', err) 41 | 42 | return 0 43 | 44 | 45 | def call(jobs): 46 | with transaction.atomic(): 47 | for job in jobs: 48 | with job.JobContext(): 49 | sip_uuid = job.args[1] 50 | dc_path = job.args[2] 51 | job.set_status(main(job, sip_uuid, dc_path)) 52 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/verify_mets.py: -------------------------------------------------------------------------------- 1 | """verify_mets.py 2 | 3 | Verify METS documents provided to the script. Its first, and primary use so 4 | far is to verify the validity of custom structmaps included with transfers and 5 | supplied on ingest after appraisal. 6 | """ 7 | 8 | import os 9 | 10 | from lxml import etree 11 | 12 | from a3m.archivematicaFunctions import strToUnicode 13 | 14 | 15 | class VerifyMETSException(Exception): 16 | """Exception to raise if METS validation fails.""" 17 | 18 | 19 | def call(jobs): 20 | """Primary entry point for this script.""" 21 | for job in jobs: 22 | with job.JobContext(): 23 | mets_structmap = os.path.join( 24 | strToUnicode(job.args[1]), "metadata", "mets_structmap.xml" 25 | ) 26 | mets_xsd = job.args[2] 27 | if not os.path.isfile(mets_structmap): 28 | job.pyprint("Custom structmap not supplied with package") 29 | return 30 | if not os.path.isfile(mets_xsd): 31 | raise VerifyMETSException 32 | xmlschema = etree.XMLSchema( 33 | etree.parse( # noqa S320 34 | mets_xsd, etree.XMLParser(resolve_entities=False, no_network=True) 35 | ) 36 | ) 37 | # Raise an exception if not valid, e.g. etree.DocumentInvalid 38 | # otherwise, the document validates correctly and returns. 39 | xmlschema.assertValid( 40 | etree.parse( # noqa S320 41 | mets_structmap, 42 | etree.XMLParser(resolve_entities=False, no_network=True), 43 | ) 44 | ) 45 | job.pyprint("Custom structmap validated correctly") 46 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: read 11 | id-token: write 12 | packages: write 13 | 14 | jobs: 15 | pypi: 16 | runs-on: ubuntu-latest 17 | environment: release 18 | steps: 19 | - name: "Check out source code" 20 | uses: "actions/checkout@v4" 21 | - name: Install the latest version of uv 22 | uses: astral-sh/setup-uv@v2 23 | with: 24 | enable-cache: true 25 | version: latest 26 | - name: Build package 27 | run: uv build 28 | - name: "Upload distribution packages to PyPI" 29 | uses: pypa/gh-action-pypi-publish@release/v1 30 | docker: 31 | runs-on: ubuntu-latest 32 | environment: release 33 | steps: 34 | - name: "Check out source code" 35 | uses: "actions/checkout@v4" 36 | with: 37 | fetch-depth: 0 38 | - name: Capture Docker metadata 39 | id: meta 40 | uses: docker/metadata-action@v5 41 | with: 42 | images: | 43 | ghcr.io/artefactual-labs/a3m 44 | tags: | 45 | type=semver,pattern={{raw}} 46 | - name: Set up Docker Buildx 47 | uses: docker/setup-buildx-action@v3 48 | - name: Log in to the container registry 49 | uses: docker/login-action@v3 50 | with: 51 | registry: ghcr.io 52 | username: ${{ github.repository_owner }} 53 | password: ${{ secrets.GITHUB_TOKEN }} 54 | - name: Build and push Docker image 55 | uses: docker/build-push-action@v5 56 | with: 57 | push: true 58 | tags: ${{ steps.meta.outputs.tags }} 59 | labels: ${{ steps.meta.outputs.labels }} 60 | build-args: BUILDKIT_CONTEXT_KEEP_GIT_DIR=true 61 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/remove_unneeded_files.py: -------------------------------------------------------------------------------- 1 | """Attempts to remove a file if its name matches a list of filenames that 2 | should be removed. If it does, and if the removal was successful, then it 3 | updates the ``File`` model of the file accordingly and also creates a "file 4 | removed" event in the database. Command line required arguments are the path to 5 | the file and its UUID. There is a default list of file names that are deleted; 6 | however, this can be overridden in MCPClient/clientConfig.conf s 7 | """ 8 | 9 | import os 10 | import shutil 11 | 12 | from django.conf import settings as mcpclient_settings 13 | from django.db import transaction 14 | 15 | from a3m.databaseFunctions import fileWasRemoved 16 | 17 | # databaseFunctions requires Django to be set up 18 | 19 | 20 | def remove_file(job, target_file, file_uuid): 21 | removableFiles = [e.strip() for e in mcpclient_settings.REMOVABLE_FILES.split(",")] 22 | basename = os.path.basename(target_file) 23 | if basename in removableFiles: 24 | job.print_output( 25 | "Removing {filename} (UUID: {uuid})".format( 26 | uuid=file_uuid, filename=basename 27 | ) 28 | ) 29 | try: 30 | os.remove(target_file) 31 | except OSError: 32 | shutil.rmtree(target_file) 33 | # Gearman passes parameters as strings, so None (NoneType) becomes 34 | # "None" (string) 35 | if file_uuid and file_uuid != "None": 36 | fileWasRemoved(file_uuid) 37 | 38 | 39 | def call(jobs): 40 | with transaction.atomic(): 41 | for job in jobs: 42 | with job.JobContext(): 43 | target = job.args[1] 44 | file_uuid = job.args[2] 45 | 46 | job.set_status(remove_file(job, target, file_uuid)) 47 | -------------------------------------------------------------------------------- /a3m/cli/server/__main__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import platform 4 | import signal 5 | 6 | import grpc 7 | from django.conf import settings 8 | 9 | from a3m import __version__ 10 | from a3m.cli.common import configure_xml_catalog_files 11 | from a3m.cli.common import init_django 12 | from a3m.cli.common import suppress_warnings 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | def main(): 18 | init_django() 19 | suppress_warnings() 20 | configure_xml_catalog_files() 21 | 22 | from a3m.server.runner import create_server 23 | 24 | logger.info( 25 | f"Starting a3m... (version={__version__} pid={os.getpid()} " 26 | f"uid={os.getuid()} python={platform.python_version()} " 27 | f"listen={settings.RPC_BIND_ADDRESS})" 28 | ) 29 | 30 | # A3M-TODO: make this configurable, e.g. local tcp, local uds, tls certs... 31 | # (see https://grpc.github.io/grpc/python/grpc.html#create-server-credentials for more) 32 | server_credentials = grpc.local_server_credentials( 33 | grpc.LocalConnectionType.LOCAL_TCP 34 | ) 35 | 36 | server = create_server( 37 | settings.RPC_BIND_ADDRESS, 38 | server_credentials, 39 | settings.CONCURRENT_PACKAGES, 40 | settings.BATCH_SIZE, 41 | settings.WORKER_THREADS, 42 | settings.RPC_THREADS, 43 | settings.DEBUG, 44 | ) 45 | server.start() 46 | 47 | def signal_handler(signo, frame): 48 | logger.info("Received termination signal (%s)", signal.Signals(signo).name) 49 | server.stop() 50 | 51 | signal.signal(signal.SIGINT, signal_handler) 52 | signal.signal(signal.SIGTERM, signal_handler) 53 | 54 | server.wait_for_termination() 55 | 56 | logger.info("a3m shutdown complete.") 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/remove_hidden_files_and_directories.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import shutil 19 | import sys 20 | 21 | 22 | def removeHiddenFilesFromDirectory(job, dir): 23 | for item in os.listdir(dir): 24 | fullPath = os.path.join(dir, item) 25 | if os.path.isdir(fullPath): 26 | if item.startswith("."): 27 | job.pyprint("Removing directory: ", fullPath) 28 | shutil.rmtree(fullPath) 29 | else: 30 | removeHiddenFilesFromDirectory(job, fullPath) 31 | elif os.path.isfile(fullPath): 32 | if item.startswith(".") or item.endswith("~"): 33 | job.pyprint("Removing file: ", fullPath) 34 | os.remove(fullPath) 35 | 36 | else: 37 | job.pyprint("Not file or directory: ", fullPath, file=sys.stderr) 38 | 39 | 40 | def call(jobs): 41 | for job in jobs: 42 | with job.JobContext(): 43 | transferDirectory = job.args[1] 44 | removeHiddenFilesFromDirectory(job, transferDirectory) 45 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - main 7 | jobs: 8 | tests: 9 | name: "Test" 10 | runs-on: "ubuntu-22.04" 11 | steps: 12 | - name: "Check out source code" 13 | uses: "actions/checkout@v4" 14 | - name: Install the latest version of uv 15 | uses: astral-sh/setup-uv@v2 16 | with: 17 | enable-cache: true 18 | version: latest 19 | - name: Run tests 20 | run: ./test.sh 21 | - name: "Upload coverage report" 22 | if: github.repository == 'artefactual-labs/a3m' 23 | uses: "codecov/codecov-action@v4" 24 | with: 25 | files: ./coverage.xml 26 | token: ${{ secrets.CODECOV_TOKEN }} 27 | - name: Upload test results to Codecov 28 | if: ${{ !cancelled() }} 29 | uses: codecov/test-results-action@v1 30 | with: 31 | files: ./junit.xml 32 | token: ${{ secrets.CODECOV_TOKEN }} 33 | e2e: 34 | name: "Run E2E tests" 35 | runs-on: ubuntu-latest 36 | steps: 37 | - name: Checkout 38 | uses: actions/checkout@v4 39 | - name: Set up Docker Buildx 40 | uses: docker/setup-buildx-action@v3 41 | - name: Build and cache 42 | uses: docker/build-push-action@v5 43 | with: 44 | context: . 45 | tags: a3m:latest 46 | push: false 47 | load: true 48 | cache-from: type=gha 49 | cache-to: type=gha,mode=max 50 | - name: Run test 51 | run: | 52 | docker image ls 53 | docker run \ 54 | --rm \ 55 | --entrypoint=python \ 56 | --env=A3M_DEBUG=yes \ 57 | a3m:latest \ 58 | -m a3m.cli.client \ 59 | --name=MARBLES \ 60 | https://github.com/artefactual/archivematica-sampledata/raw/master/SampleTransfers/Images/pictures/MARBLES.TGA 61 | -------------------------------------------------------------------------------- /tests/common/fixtures/agents.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "fields": { 4 | "agenttype": "software", 5 | "identifiervalue": "Archivematica-1.3.2", 6 | "name": "Archivematica", 7 | "identifiertype": "preservation system" 8 | }, 9 | "model": "main.agent", 10 | "pk": 1 11 | }, 12 | { 13 | "fields": { 14 | "agenttype": "organization", 15 | "identifiervalue": "demo", 16 | "name": "demo", 17 | "identifiertype": "repository code" 18 | }, 19 | "model": "main.agent", 20 | "pk": 2 21 | }, 22 | { 23 | "fields": { 24 | "agenttype": "Archivematica user", 25 | "identifiervalue": "1", 26 | "name": "username=\"kmindelan\", first_name=\"Keladry\", last_name=\"Mindelan\"", 27 | "identifiertype": "Archivematica user pk" 28 | }, 29 | "model": "main.agent", 30 | "pk": 3 31 | }, 32 | { 33 | "fields": { 34 | "agenttype": "Archivematica user", 35 | "identifiervalue": "2", 36 | "name": "SIP Agent", 37 | "identifiertype": "Archivematica user pk" 38 | }, 39 | "model": "main.agent", 40 | "pk": 5 41 | }, 42 | { 43 | "fields": { 44 | "agenttype": "Archivematica user", 45 | "identifiervalue": "2", 46 | "name": "Transfer Agent", 47 | "identifiertype": "Archivematica user pk" 48 | }, 49 | "model": "main.agent", 50 | "pk": 10 51 | }, 52 | { 53 | "fields": { 54 | "agenttype": "software", 55 | "identifiervalue": "Other-Software-1.0", 56 | "name": "Other Software", 57 | "identifiertype": "preservation system" 58 | }, 59 | "model": "main.agent", 60 | "pk": 11 61 | }, 62 | { 63 | "fields": { 64 | "agenttype": "organization", 65 | "identifiervalue": "other-repository", 66 | "name": "Other repository", 67 | "identifiertype": "repository code" 68 | }, 69 | "model": "main.agent", 70 | "pk": 12 71 | } 72 | ] 73 | -------------------------------------------------------------------------------- /tests/common/test_execute_functions.py: -------------------------------------------------------------------------------- 1 | import shlex 2 | 3 | import a3m.executeOrRunSubProcess as execsub 4 | 5 | 6 | def test_capture_output(): 7 | """Tests behaviour of capture_output when executing sub processes.""" 8 | 9 | # Test that stdout and stderr are not captured by default 10 | ret, std_out, std_err = execsub.launchSubProcess(["ls", "/tmp"]) 11 | assert std_out == "" 12 | assert std_err == "" 13 | 14 | # Test that stdout and stderr are captured when `capture_output` is 15 | # enabled. 16 | ret, std_out, std_err = execsub.launchSubProcess( 17 | ["ls", "/tmp"], capture_output=True 18 | ) 19 | assert std_out != "" or std_err != "" 20 | 21 | # Test that stdout and stderr are not captured when `capture_output` is 22 | # not enabled. 23 | ret, std_out, std_err = execsub.launchSubProcess( 24 | ["ls", "/tmp"], capture_output=False 25 | ) 26 | assert std_out == "" 27 | assert std_err == "" 28 | 29 | # Test that when `capture_output` is `False`, then stdout is never returned 30 | # and stderr is only returned when the exit code is non-zero. 31 | cmd1 = 'sh -c \'>&2 echo "error"; echo "out"; exit 1\'' 32 | cmd0 = 'sh -c \'>&2 echo "error"; echo "out"; exit 0\'' 33 | 34 | ret, std_out, std_err = execsub.launchSubProcess( 35 | shlex.split(cmd1), capture_output=False 36 | ) 37 | assert std_out.strip() == "" 38 | assert std_err.strip() == "error" 39 | 40 | ret, std_out, std_err = execsub.launchSubProcess( 41 | shlex.split(cmd0), capture_output=False 42 | ) 43 | assert std_out.strip() == "" 44 | assert std_err.strip() == "" 45 | 46 | ret, std_out, std_err = execsub.launchSubProcess( 47 | shlex.split(cmd1), capture_output=True 48 | ) 49 | assert std_out.strip() == "out" 50 | assert std_err.strip() == "error" 51 | 52 | ret, std_out, std_err = execsub.launchSubProcess( 53 | shlex.split(cmd0), capture_output=True 54 | ) 55 | assert std_out.strip() == "out" 56 | assert std_err.strip() == "error" 57 | -------------------------------------------------------------------------------- /a3m/cli/client/wrapper.py: -------------------------------------------------------------------------------- 1 | from contextlib import ContextDecorator 2 | 3 | import grpc 4 | from django.conf import settings 5 | 6 | from a3m.server.rpc import Client 7 | 8 | 9 | class ClientWrapper(ContextDecorator): 10 | """A context manager that provides a a3m client or client-server instance. 11 | 12 | Use ``address`` to indicate the location of the a3m server. When undefined, 13 | this wrapper launches an embedded server and sets up the client accordingly. 14 | Used resources are automatically cleaned up. 15 | """ 16 | 17 | BIND_LOCAL_ADDRESS = "localhost:0" 18 | 19 | def __init__(self, address=None, wait_for_ready=False): 20 | self.address = address 21 | self.wait_for_ready = wait_for_ready 22 | 23 | self._create_server() 24 | self._create_client() 25 | 26 | def __enter__(self): 27 | if self.server is not None: 28 | self.server.start() 29 | return self 30 | 31 | def __exit__(self, exc_type, exc_value, exc_traceback): 32 | if self.server is not None: 33 | self.server.stop() 34 | if exc_type: 35 | return False 36 | 37 | def _create_server(self): 38 | self.server = None 39 | 40 | if self.address is not None: 41 | return 42 | 43 | from a3m.server.runner import create_server 44 | 45 | server_credentials = grpc.local_server_credentials( 46 | grpc.LocalConnectionType.LOCAL_TCP 47 | ) 48 | self.server = create_server( 49 | self.BIND_LOCAL_ADDRESS, 50 | server_credentials, 51 | settings.CONCURRENT_PACKAGES, 52 | settings.BATCH_SIZE, 53 | settings.WORKER_THREADS, 54 | settings.RPC_THREADS, 55 | settings.DEBUG, 56 | ) 57 | 58 | # Compute address since port was dynamically assigned. 59 | self.address = f"localhost:{self.server.grpc_port}" 60 | 61 | def _create_client(self): 62 | channel = grpc.insecure_channel(self.address) 63 | self.client = Client(channel, wait_for_ready=self.wait_for_ready) 64 | -------------------------------------------------------------------------------- /tests/client/test_identify_file_format.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | import pytest 4 | 5 | from a3m.client.clientScripts.identify_file_format import main 6 | from a3m.main.models import Event 7 | from a3m.main.models import File 8 | from a3m.main.models import FileID 9 | from a3m.main.models import Transfer 10 | 11 | 12 | @pytest.fixture() 13 | def subdir_path(tmp_path): 14 | subdir = tmp_path / "subdir1" 15 | subdir.mkdir() 16 | 17 | return subdir 18 | 19 | 20 | @pytest.fixture() 21 | def file_path(subdir_path): 22 | file_path = subdir_path / "script.py" 23 | file_path.write_text("import sys; sys.exit(0)") 24 | 25 | return file_path 26 | 27 | 28 | @pytest.fixture() 29 | def transfer(db): 30 | return Transfer.objects.create( 31 | uuid=uuid.uuid4(), currentlocation=r"%transferDirectory%" 32 | ) 33 | 34 | 35 | @pytest.fixture() 36 | def file_obj(db, transfer, tmp_path, file_path): 37 | file_obj_path = "".join( 38 | [transfer.currentlocation, str(file_path.relative_to(tmp_path))] 39 | ) 40 | file_obj = File.objects.create( 41 | uuid=uuid.uuid4(), 42 | transfer=transfer, 43 | originallocation=file_obj_path, 44 | currentlocation=file_obj_path, 45 | removedtime=None, 46 | size=24, 47 | checksum="7e272e3e7076fef4248bc278918ecf003f05f275dff3bdb9140f1f4120b76ff1", 48 | checksumtype="sha256", 49 | ) 50 | 51 | return file_obj 52 | 53 | 54 | def test_identify_file_format(file_obj, file_path): 55 | code = main(str(file_path), file_obj.uuid, disable_reidentify=False) 56 | assert code == 0 57 | 58 | format_version = file_obj.get_format_version() 59 | assert format_version is not None 60 | assert format_version.pronom_id == "fmt/938" 61 | 62 | Event.objects.get( 63 | file_uuid=file_obj, 64 | event_type="format identification", 65 | event_outcome="Positive", 66 | event_outcome_detail="fmt/938", 67 | ) 68 | FileID.objects.get( 69 | file_id=file_obj.uuid, 70 | format_name="Python Script File", 71 | format_registry_key="fmt/938", 72 | ) 73 | -------------------------------------------------------------------------------- /examples/webapp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """HTTP gateway for a3m. 3 | 4 | When executed, this module starts a simple HTTP server that submits transfers 5 | to an embedded a3m server instance on every GET request received. 6 | 7 | Usage:: 8 | 9 | $ pip install a3m 10 | $ ./webapp.py 11 | $ curl 127.0.0.1:33892 12 | 13 | """ 14 | 15 | from http.server import BaseHTTPRequestHandler 16 | from http.server import ThreadingHTTPServer 17 | from threading import Thread 18 | 19 | import grpc 20 | 21 | from a3m.cli.common import init_django 22 | 23 | init_django() # This will not be needed in the future. 24 | from a3m.server.rpc.client import Client 25 | from a3m.server.runner import create_server 26 | 27 | 28 | class RequestHandler(BaseHTTPRequestHandler): 29 | def __init__(self, *args): 30 | super().__init__(*args) 31 | 32 | def do_GET(self): 33 | self.send_response(200) 34 | self.send_header("Content-Type", "text/html") 35 | self.end_headers() 36 | try: 37 | resp = a3mc.submit( 38 | name="Test", 39 | url="https://github.com/artefactual/archivematica-sampledata/raw/master/SampleTransfers/ZippedDirectoryTransfers/DemoTransferCSV.zip", 40 | ) 41 | except Exception as err: 42 | self.wfile.write(f"Error: {err}".encode()) 43 | else: 44 | self.wfile.write(f"Transfer submitted! {resp.id}".encode()) 45 | 46 | 47 | a3md = create_server( 48 | bind_address="127.0.0.1:0", 49 | server_credentials=grpc.local_server_credentials( 50 | grpc.LocalConnectionType.LOCAL_TCP 51 | ), 52 | max_concurrent_packages=1, 53 | batch_size=125, 54 | queue_workers=3, 55 | grpc_workers=3, 56 | ) 57 | a3md_thread = Thread(target=a3md.start) 58 | a3md_thread.start() 59 | 60 | httpd = ThreadingHTTPServer(("127.0.0.1", 0), RequestHandler) 61 | httpd_thread = Thread(target=httpd.serve_forever) 62 | httpd_thread.start() 63 | print(f"Web server listening on port {httpd.server_port}/tcp.") 64 | 65 | a3mc = Client(grpc.insecure_channel(f"127.0.0.1:{a3md.grpc_port}")) 66 | 67 | a3md_thread.join() 68 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/a3m_store_aip.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | import boto3 5 | from botocore.client import Config 6 | from django.conf import settings 7 | from django.db import transaction 8 | 9 | from a3m.client import metrics 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def _upload_file(path, bucket, key): 15 | boto_args = {"service_name": "s3"} 16 | if settings.S3_ENDPOINT_URL: 17 | boto_args.update(endpoint_url=settings.S3_ENDPOINT_URL) 18 | if settings.S3_REGION_NAME: 19 | boto_args.update(region_name=settings.S3_REGION_NAME) 20 | if settings.S3_ACCESS_KEY_ID and settings.S3_SECRET_ACCESS_KEY: 21 | boto_args.update( 22 | aws_access_key_id=settings.S3_ACCESS_KEY_ID, 23 | aws_secret_access_key=settings.S3_SECRET_ACCESS_KEY, 24 | ) 25 | if settings.S3_USE_SSL: 26 | boto_args.update(use_ssl=settings.S3_USE_SSL) 27 | 28 | s3_config = {} 29 | if settings.S3_ADDRESSING_STYLE: 30 | s3_config.update(addressing_style=settings.S3_ADDRESSING_STYLE) 31 | if settings.S3_SIGNATURE_VERSION: 32 | s3_config.update(signature_version=settings.S3_SIGNATURE_VERSION) 33 | if s3_config: 34 | config = Config(s3=s3_config) 35 | boto_args.update(config=config) 36 | 37 | s3 = boto3.resource(**boto_args) 38 | s3.meta.client.upload_file(path, bucket, key) 39 | 40 | 41 | def _store_aip(job, sip_id, aip_path): 42 | metrics.aip_stored(sip_id, size=0) # A3M-TODO: write size. 43 | 44 | if not settings.S3_ENABLED: 45 | return 46 | 47 | # We're assuming that we don't have a directory! 48 | if aip_path.is_dir(): 49 | job.pyprint("AIP must be compressed", file=sys.stderr) 50 | raise Exception("AIP is a directory") 51 | 52 | logger.info("Uploading AIP...") 53 | _upload_file(str(aip_path), settings.S3_BUCKET, sip_id) 54 | 55 | 56 | def call(jobs): 57 | job = jobs[0] 58 | with transaction.atomic(): 59 | with job.JobContext(): 60 | sip_id = job.args[1] 61 | aip_path = job.args[2] 62 | job.set_status(_store_aip(job, sip_id, aip_path)) 63 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/move_transfer.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | 19 | from django.db import transaction 20 | 21 | from a3m.fileOperations import rename 22 | from a3m.main.models import Transfer 23 | 24 | 25 | def updateDB(dst, transferUUID): 26 | Transfer.objects.filter(uuid=transferUUID).update(currentlocation=dst) 27 | 28 | 29 | def moveSIP(job, src, dst, transferUUID, sharedDirectoryPath): 30 | # os.rename(src, dst) 31 | if src.endswith("/"): 32 | src = src[:-1] 33 | 34 | dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1) 35 | if dest.endswith("/"): 36 | dest = os.path.join(dest, os.path.basename(src)) 37 | if dest.endswith("/."): 38 | dest = os.path.join(dest[:-1], os.path.basename(src)) 39 | 40 | if os.path.isdir(src): 41 | dest += "/" 42 | updateDB(dest, transferUUID) 43 | 44 | return rename(src, dst, printfn=job.pyprint, should_exit=False) 45 | 46 | 47 | def call(jobs): 48 | with transaction.atomic(): 49 | for job in jobs: 50 | with job.JobContext(): 51 | src = job.args[1] 52 | dst = job.args[2] 53 | transferUUID = job.args[3] 54 | sharedDirectoryPath = job.args[4] 55 | job.set_status( 56 | moveSIP(job, src, dst, transferUUID, sharedDirectoryPath) 57 | ) 58 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/remove_files_without_premis_metadata.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import argparse 18 | import os 19 | 20 | from a3m.main.models import File 21 | 22 | 23 | def verifyFileUUID(job, fileUUID, filePath, sipDirectory): 24 | if fileUUID == "None": 25 | relativeFilePath = filePath.replace(sipDirectory, "%SIPDirectory%", 1) 26 | job.print_output( 27 | "Deleting", relativeFilePath, "because it is not in the database." 28 | ) 29 | os.remove(filePath) 30 | return 31 | file_ = File.objects.get(uuid=fileUUID) 32 | if file_.filegrpuse == "deleted": 33 | if os.path.exists(filePath): 34 | relativeFilePath = filePath.replace(sipDirectory, "%SIPDirectory%", 1) 35 | job.print_output( 36 | "Deleting", relativeFilePath, "because it is marked as deleted" 37 | ) 38 | os.remove(filePath) 39 | 40 | 41 | def call(jobs): 42 | parser = argparse.ArgumentParser() 43 | parser.add_argument("-f", "--inputFile", default="") 44 | parser.add_argument("-o", "--sipDirectory", default="") 45 | parser.add_argument("-i", "--fileUUID", default="None") 46 | 47 | for job in jobs: 48 | with job.JobContext(): 49 | args = parser.parse_args(job.args[1:]) 50 | 51 | verifyFileUUID(job, args.fileUUID, args.inputFile, args.sipDirectory) 52 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | a3m is available in PyPI, a software repository for Python projects:: 6 | 7 | pip install a3m 8 | 9 | We also publish a Docker image that bundles all software dependencies that a3m 10 | needs for a good out-of-box experience. This image is public and can be pulled 11 | from the command line with:: 12 | 13 | docker pull ghcr.io/artefactual-labs/a3m:latest 14 | 15 | If you don't want to use Docker, it is still possible to run a3m successfully 16 | as long as the software dependencies are provided in some other way. Please 17 | continue reading. 18 | 19 | Dependencies 20 | ============ 21 | 22 | We don't have a comprehensive list of software dependencies yet or mechanisms 23 | to manage them dynamically. For the time being, here are some examples valid 24 | for an Ubuntu/Debian Linux environment: 25 | 26 | `Unar `_:: 27 | 28 | sudo apt-get install unar 29 | 30 | `FFmpeg (ffprobe) `_:: 31 | 32 | sudo apt-get install ffmpeg 33 | 34 | `ExifTool `_:: 35 | 36 | wget https://packages.archivematica.org/1.15.x/ubuntu-externals/pool/main/libi/libimage-exiftool-perl/libimage-exiftool-perl_10.10-2~14.04_all.deb` 37 | sudo dpkg -i libimage-exiftool-perl_10.10-2~14.04_all.deb 38 | 39 | `MediaInfo `_:: 40 | 41 | sudo apt-get install mediainfo 42 | 43 | `Sleuth Kit (fiwalk) `_:: 44 | 45 | sudo apt-get install sleuthkit 46 | 47 | `JHOVE `_:: 48 | 49 | sudo apt-get ca-certificates-java java-common openjdk-8-jre-headless 50 | wget https://packages.archivematica.org/1.15.x/ubuntu-externals/pool/main/j/jhove/jhove_1.20.1-6~18.04_all.deb 51 | sudo dpkg -i jhove_1.20.1-6~18.04_all.deb 52 | 53 | `7-Zip `_:: 54 | 55 | sudo apt-get install pzip-full 56 | 57 | `atool `_:: 58 | 59 | sudo apt-get install atool 60 | 61 | `test `_:: 62 | 63 | sudo apt-get install coreutils 64 | -------------------------------------------------------------------------------- /a3m/common_metrics.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | from contextlib import contextmanager 4 | 5 | from prometheus_client import Counter 6 | 7 | # We need to balance reasonably accurate tracking with high cardinality here, as 8 | # this is used with script_name labels and there are already over 100 scripts. 9 | TASK_DURATION_BUCKETS = ( 10 | 2.0, 11 | 5.0, 12 | 10.0, 13 | 20.0, 14 | 30.0, 15 | 60.0, 16 | 120.0, # 2 min 17 | 300.0, # 5 min 18 | 600.0, # 10 min 19 | 1800.0, # 30 min 20 | 3600.0, # 1 hour 21 | math.inf, 22 | ) 23 | # Histogram buckets for total processing time, e.g. for an AIP. 24 | # Not used with labels. 25 | PROCESSING_TIME_BUCKETS = ( 26 | 10.0, 27 | 20.0, 28 | 30.0, 29 | 60.0, 30 | 120.0, # 2 min 31 | 300.0, # 5 min 32 | 600.0, # 10 min 33 | 1800.0, # 30 min 34 | 3600.0, # 1 hour 35 | 7200.0, # 2 hours 36 | 14400.0, # 4 hours 37 | 28800.0, # 8 hours 38 | math.inf, 39 | ) 40 | # Histogram for distribution of transfer and AIP file counts 41 | PACKAGE_FILE_COUNT_BUCKETS = ( 42 | 10.0, 43 | 50.0, 44 | 100.0, 45 | 250.0, 46 | 500.0, 47 | 1000.0, 48 | 2000.0, 49 | 5000.0, 50 | 10000.0, 51 | math.inf, 52 | ) 53 | # Histogram for distribution of transfer and AIP size in bytes 54 | PACKAGE_SIZE_BUCKETS = ( 55 | 1000000.0, # 1 MB 56 | 10000000.0, # 10 MB 57 | 50000000.0, # 50 MB 58 | 100000000.0, # 100 MB 59 | 200000000.0, # 200 MB 60 | 500000000.0, # 500 MB 61 | 1000000000.0, # 1 GB 62 | 5000000000.0, # 5 GB 63 | 10000000000.0, # 10 GB 64 | math.inf, 65 | ) 66 | 67 | 68 | db_retry_time_counter = Counter( 69 | "common_db_retry_time_seconds", 70 | ( 71 | "Total time waiting to retry database transactions in seconds, labeled " 72 | "by operation description" 73 | ), 74 | ["description"], 75 | ) 76 | 77 | 78 | @contextmanager 79 | def db_retry_timer(*args, **kwargs): 80 | start_time = time.time() 81 | try: 82 | yield 83 | finally: 84 | duration = time.time() - start_time 85 | db_retry_time_counter.labels(**kwargs).inc(duration) 86 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/load_labels_from_csv.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import csv 18 | import os 19 | 20 | from django.db import transaction 21 | 22 | from a3m.main.models import File 23 | 24 | 25 | def call(jobs): 26 | with transaction.atomic(): 27 | for job in jobs: 28 | with job.JobContext(): 29 | transferUUID = job.args[1] 30 | fileLabels = job.args[2] 31 | labelFirst = False 32 | 33 | if not os.path.isfile(fileLabels): 34 | job.pyprint("No such file:", fileLabels) 35 | job.set_status(0) 36 | continue 37 | 38 | # use universal newline mode to support unusual newlines, like \r 39 | with open(fileLabels) as f: 40 | reader = csv.reader(f) 41 | for row in reader: 42 | if labelFirst: 43 | label = row[0] 44 | filePath = row[1] 45 | else: 46 | label = row[1] 47 | filePath = row[0] 48 | filePath = os.path.join("%transferDirectory%objects/", filePath) 49 | File.objects.filter( 50 | originallocation=filePath, transfer_id=transferUUID 51 | ).update(label=label) 52 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/create_transfer_metadata.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from lxml import etree 4 | 5 | from a3m.client import metrics 6 | from a3m.main.models import Transfer 7 | 8 | 9 | def fetch_set(sip_uuid): 10 | transfer = Transfer.objects.get(uuid=sip_uuid) 11 | return transfer.transfermetadatasetrow 12 | 13 | 14 | def fetch_fields_and_values(sip_uuid): 15 | metadata_set = fetch_set(sip_uuid) 16 | if metadata_set is None: 17 | return [] 18 | 19 | results = metadata_set.transfermetadatafieldvalue_set.exclude( 20 | fieldvalue="" 21 | ).values_list("field__fieldname", "fieldvalue") 22 | 23 | return results 24 | 25 | 26 | def build_element(label, value, root): 27 | element = etree.SubElement(root, label) 28 | element.text = value 29 | return element 30 | 31 | 32 | def call(jobs): 33 | parser = ArgumentParser( 34 | description="Create a generic XML document from transfer metadata" 35 | ) 36 | parser.add_argument("-S", "--sipUUID", action="store", dest="sip_uuid") 37 | parser.add_argument("-x", "--xmlFile", action="store", dest="xml_file") 38 | 39 | for job in jobs: 40 | with job.JobContext(): 41 | opts = parser.parse_args(job.args[1:]) 42 | 43 | root = etree.Element("transfer_metadata") 44 | 45 | values = fetch_fields_and_values(opts.sip_uuid) 46 | elements = [build_element(label, value, root) for (label, value) in values] 47 | 48 | # If there is no transfer metadata, skip writing the XML 49 | if elements: 50 | tree = etree.ElementTree(root) 51 | tree.write( 52 | opts.xml_file, 53 | pretty_print=True, 54 | xml_declaration=True, 55 | encoding="utf-8", 56 | ) 57 | 58 | job.pyprint(etree.tostring(tree)) 59 | 60 | # This is an odd point to mark the transfer as "completed", but it's the 61 | # last step in the "Complete Transfer" microservice group before the folder 62 | # move, so it seems like the best option we have for now. 63 | metrics.transfer_completed(opts.sip_uuid) 64 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This repository is the place to file a3m bug reports as well as make suggestions 4 | for new or enhanced features. Anyone with a GitHub account can add an issue, 5 | comment on someone else's issue, or make a pull request. 6 | 7 | ## Security 8 | 9 | If you have a security concern about a3m or any of its companion repositories, 10 | please do not file it here. See the [security policy](SECURITY.md) in this 11 | repository for directions on how to report security issues. 12 | 13 | ## Filing an issue 14 | 15 | All changes to a3m should start with an issue, including bug fixes, new 16 | features, and enhancements to existing features. 17 | 18 | To file an issue, go to [the Issues 19 | tab](https://github.com/artefactual-labs/a3m/issues) and click the green **New 20 | issue** button in the top right-hand corner. You can select the appropriate 21 | template from the list. Fill out the template with as much information as you 22 | can. 23 | 24 | An issue should describe a behaviour without implying a solution. The pull 25 | request that may follow, if changes to the codebase are necessary, fixes the 26 | problem. Framing your issue as a problem statement helps everyone understand why 27 | the issue is important - it describes how Enduro is not performing as it should 28 | (bug) or as it could (enhancement). Please title your issue as a problem 29 | statement, starting with "Problem:". You can check [existing 30 | issues](https://github.com/archivematica/Issues/issues) for examples. 31 | 32 | ### Reporting a bug 33 | 34 | To report a bug, select **Bug report** from the issue templates and fill out the 35 | fields with as much information as possible. 36 | 37 | Useful information to provide includes: 38 | 39 | * What version of a3m are you using? 40 | * Are you using it with Enduro? 41 | * How was it installed? 42 | * Was this a fresh install or an upgrade? 43 | * What did you do to cause this bug to happen? 44 | * What did you expect to happen? 45 | * What did you see instead? 46 | * Can you reproduce this reliably? 47 | 48 | ### Submitting an enhancement idea 49 | 50 | To suggest a new feature or an enhancement to an existing feature, select 51 | **Feature request** from the issue templates and fill out the fields with as 52 | much information as possible. 53 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/move_sip.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import shutil 19 | import sys 20 | 21 | from django.db import transaction 22 | 23 | from a3m.fileOperations import rename 24 | from a3m.main.models import SIP 25 | 26 | 27 | def updateDB(dst, sip_uuid): 28 | SIP.objects.filter(uuid=sip_uuid).update(currentpath=dst) 29 | 30 | 31 | def moveSIP(job, src, dst, sipUUID, sharedDirectoryPath): 32 | # Prepare paths 33 | if src.endswith("/"): 34 | src = src[:-1] 35 | 36 | dest = dst.replace(sharedDirectoryPath, "%sharedPath%", 1) 37 | if dest.endswith("/"): 38 | dest = os.path.join(dest, os.path.basename(src)) 39 | if dest.endswith("/."): 40 | dest = os.path.join(dest[:-1], os.path.basename(src)) 41 | updateDB(dest + "/", sipUUID) 42 | 43 | # If destination already exists, delete it with warning 44 | dest_path = os.path.join(dst, os.path.basename(src)) 45 | if os.path.exists(dest_path): 46 | job.pyprint(dest_path, "exists, deleting", file=sys.stderr) 47 | shutil.rmtree(dest_path) 48 | 49 | return rename(src, dst, printfn=job.pyprint, should_exit=False) 50 | 51 | 52 | def call(jobs): 53 | with transaction.atomic(): 54 | for job in jobs: 55 | with job.JobContext(): 56 | src = job.args[1] 57 | dst = job.args[2] 58 | sipUUID = job.args[3] 59 | sharedDirectoryPath = job.args[4] 60 | job.set_status(moveSIP(job, src, dst, sipUUID, sharedDirectoryPath)) 61 | -------------------------------------------------------------------------------- /tests/client/test_has_packages.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from uuid import uuid4 3 | 4 | import pytest 5 | 6 | from a3m.client.clientScripts import has_packages 7 | from a3m.client.job import Job 8 | from a3m.main.models import File 9 | from a3m.main.models import Transfer 10 | 11 | 12 | @pytest.fixture 13 | def transfer(db, tmp_path): 14 | transfer_dir = tmp_path / "transfer" 15 | transfer_dir.mkdir() 16 | 17 | return Transfer.objects.create(currentlocation=str(transfer_dir)) 18 | 19 | 20 | @pytest.fixture 21 | def compressed_file(db, transfer): 22 | # Simulate a compressed file being extracted to a directory with the same name. 23 | d = Path(transfer.currentlocation) / "compressed.zip" 24 | d.mkdir() 25 | 26 | # Place an extracted file in it. 27 | f = d / "file.txt" 28 | f.touch() 29 | 30 | # Create File models for the compressed and extracted files. 31 | result = File.objects.create( 32 | uuid=uuid4(), transfer=transfer, originallocation=d, currentlocation=d 33 | ) 34 | File.objects.create( 35 | uuid=uuid4(), transfer=transfer, originallocation=f, currentlocation=f 36 | ) 37 | 38 | # Create a file format version for the compressed file. 39 | # ce097bf8 is a fpr.formatversion for 7zip with an extraction rule. 40 | result.fileformatversion_set.create( 41 | format_version_id="ce097bf8-dc4d-4083-932e-82224890f26a" 42 | ) 43 | 44 | return result 45 | 46 | 47 | def test_main_detects_file_is_extractable_via_fpr( 48 | db, mocker, transfer, compressed_file 49 | ): 50 | job = mocker.Mock(spec=Job) 51 | 52 | result = has_packages.main(job, str(transfer.uuid)) 53 | 54 | assert result == 0 55 | 56 | 57 | def test_main_detects_file_was_already_extracted_from_unpacking_event( 58 | db, mocker, transfer, compressed_file 59 | ): 60 | job = mocker.Mock(spec=Job) 61 | 62 | extracted_file = File.objects.get( 63 | currentlocation__startswith=compressed_file.currentlocation, 64 | currentlocation__endswith="file.txt", 65 | ) 66 | extracted_file.event_set.create( 67 | event_type="unpacking", 68 | event_detail=f"Unpacked from: {extracted_file.currentlocation} ({compressed_file.uuid})", 69 | ) 70 | 71 | result = has_packages.main(job, str(transfer.uuid)) 72 | 73 | assert result == 1 74 | -------------------------------------------------------------------------------- /a3m/server/translation.py: -------------------------------------------------------------------------------- 1 | """ 2 | i18n handling. 3 | """ 4 | 5 | import pprint 6 | 7 | FALLBACK_LANG = "en" 8 | UNKNOWN_TRANSLATION_LABEL = "" 9 | 10 | 11 | class TranslationLabel: 12 | """Mixin for easy access to translated messages. 13 | 14 | The JSON-encoded workflow uses ``object`` (mapping type) to associate 15 | messages for a particular property to language codes, e.g.:: 16 | 17 | { 18 | "en": "cat", 19 | "es": "gato" 20 | } 21 | 22 | ``json`` decodes it as a ``dict``. This class wraps the dictionary so it is 23 | easier to access the translations. Usage example:: 24 | 25 | >>> message = {"en": "cat", "es": "gato"} 26 | >>> tr = TranslationLabel(message) 27 | >>> tr 28 | TranslationLabel <{'en': 'cat', 'es': 'gato'}> 29 | >>> str(tr) 30 | 'cat' 31 | >>> tr["es"] 32 | 'gato' 33 | >>> tr["foobar"] 34 | 'cat' 35 | >>> tr.get_label(lang="es") 36 | 'gato' 37 | >>> tr.get_label(lang="is", "köttur") 38 | 'köttur' 39 | 40 | """ 41 | 42 | def __init__(self, translations): 43 | if not isinstance(translations, dict): 44 | translations = {FALLBACK_LANG: str(translations)} 45 | self._src = translations 46 | 47 | def __repr__(self): 48 | return f"{self.__class__.__name__}({pprint.saferepr(self._src)})" 49 | 50 | def __str__(self): 51 | return self.get_label() 52 | 53 | def __getitem__(self, lang): 54 | return self.get_label(lang) 55 | 56 | def _prepare_lang(self, lang): 57 | parts = lang.partition("-") 58 | if parts[1] == "-": 59 | return f"{parts[0]}_{parts[2].upper()}" 60 | return lang 61 | 62 | def get_label(self, lang=FALLBACK_LANG, fallback_label=None): 63 | """Get the translation of a message. 64 | 65 | It defaults to ``FALLBACK_LANG`` unless ``lang`` is used. 66 | It accepts a ``fallback_label``, used when the message is not 67 | available in the language given. As a last resort, it returns 68 | ``UNKNOWN_TRANSLATION_LABEL``. 69 | """ 70 | lang = self._prepare_lang(lang) 71 | if lang in self._src: 72 | return self._src[lang] 73 | if fallback_label is not None: 74 | return fallback_label 75 | return self._src.get(FALLBACK_LANG, UNKNOWN_TRANSLATION_LABEL) 76 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/restructure_for_compliance.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import logging 18 | import os 19 | import shutil 20 | 21 | from django.db import transaction 22 | 23 | from a3m.archivematicaFunctions import OPTIONAL_FILES 24 | from a3m.archivematicaFunctions import REQUIRED_DIRECTORIES 25 | from a3m.archivematicaFunctions import create_structured_directory 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | def _move_file(job, src, dst, exit_on_error=True): 31 | logger.debug("Moving %s to %s", src, dst) 32 | try: 33 | shutil.move(src, dst) 34 | except OSError: 35 | job.pyprint("Could not move", src) 36 | if exit_on_error: 37 | raise 38 | 39 | 40 | def restructure_transfer(job, unit_path): 41 | # Create required directories 42 | create_structured_directory(unit_path) 43 | 44 | # Move everything else to the objects directory 45 | for item in os.listdir(unit_path): 46 | src = os.path.join(unit_path, item) 47 | dst = os.path.join(unit_path, "objects", ".") 48 | if os.path.isdir(src) and item not in REQUIRED_DIRECTORIES: 49 | _move_file(job, src, dst) 50 | elif os.path.isfile(src) and item not in OPTIONAL_FILES: 51 | _move_file(job, src, dst) 52 | 53 | 54 | def call(jobs): 55 | with transaction.atomic(): 56 | for job in jobs: 57 | with job.JobContext(logger=logger): 58 | try: 59 | sip_path = job.args[1] 60 | restructure_transfer(job, sip_path) 61 | except OSError as err: 62 | job.pyprint(repr(err)) 63 | job.set_status(1) 64 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | Development 3 | =========== 4 | 5 | Python SDK 6 | ---------- 7 | 8 | You may have already learned that a3m comes with two executables: **a3m** and 9 | **a3md**. These are command-line interfaces wrapping a number of Python 10 | abstractions that we are also making available to software developers planning 11 | to build new applications embedding or communicating with a3m. 12 | 13 | :func:`a3m.server.runner.create_server` is a function that helps you create 14 | your own instance of :class:`a3m.server.runner.Server`, the gRPC server. 15 | 16 | Use :class:`a3m.server.rpc.client.Client` to communicate with it. 17 | :class:`a3m.cli.client.wrapper.ClientWrapper` is a context manager that makes 18 | easier to access to both an embedded server and its client instance. 19 | 20 | For more details, see: https://gist.github.com/sevein/2e5cf115c153df1cfc24f0f9d67f6d2a. 21 | 22 | .. warning:: 23 | 24 | These APIs are still unstable, expect changes! 25 | 26 | The following is an example of a web application that uses the development kit 27 | to embed a3m and make it available to web clients. 28 | 29 | .. literalinclude:: ../examples/webapp.py 30 | 31 | 32 | gRPC API 33 | -------- 34 | 35 | Whether you are embedding a3m or communicating with remote instances, its gRPC 36 | API is the underlying communication system and you should be able to put it in 37 | practice given any of the languages supported by the `gRPC 38 | stack `_. 39 | 40 | gRPC uses Protocol Buffers as the Interface Definition Language (IDL) for 41 | describing both the service interface and the structure of the payload messages. 42 | 43 | So far the whole definition of messages and services fits in a single file that 44 | we share below. Writing your custom client isn't hard because the stubs are 45 | automatically generated. Alternatively, it is possible to use a client such as 46 | `grpccurl `_ which dynamically browses 47 | our service schema. 48 | 49 | .. _idl: 50 | 51 | Find the generated documentation of the a3m API at `buf.build/artefactual/a3m`_. 52 | 53 | Reference 54 | --------- 55 | 56 | .. autofunction:: a3m.server.runner.create_server 57 | 58 | .. autoclass:: a3m.server.runner.Server 59 | :undoc-members: 60 | 61 | .. autoclass:: a3m.server.rpc.client.Client 62 | :undoc-members: 63 | 64 | .. autoclass:: a3m.cli.client.wrapper.ClientWrapper 65 | 66 | 67 | .. _`buf.build/artefactual/a3m`: https://buf.build/artefactual/a3m 68 | -------------------------------------------------------------------------------- /tests/client/fixtures/dublincore.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "main.dublincore", 5 | "fields": { 6 | "rights": "Public Domain", 7 | "publisher": "Tortall Press", 8 | "format": "parchement", 9 | "metadataappliestotype": "3e48343d-e2d2-4956-aaa3-b54d26eb9761", 10 | "creator": "Keladry of Mindelan", 11 | "language": "en", 12 | "type": "Archival Information Package", 13 | "description": "Glaives are cool", 14 | "title": "Yamani Weapons", 15 | "date": "2015", 16 | "relation": "None", 17 | "source": "Numair's library", 18 | "coverage": "", 19 | "contributor": "Yuki", 20 | "identifier": "42/1", 21 | "metadataappliestoidentifier": "8b891d7c-5bd2-4249-84a1-2f00f725b981", 22 | "subject": "Glaives", 23 | "status": "ORIGINAL" 24 | } 25 | }, 26 | { 27 | "pk": 2, 28 | "model": "main.dublincore", 29 | "fields": { 30 | "rights": "Public Domain", 31 | "publisher": "Tortall Press", 32 | "format": "palimpsest", 33 | "metadataappliestotype": "3e48343d-e2d2-4956-aaa3-b54d26eb9761", 34 | "creator": "Keladry of Mindelan", 35 | "language": "en", 36 | "type": "Archival Information Package", 37 | "description": "Glaives are awesome", 38 | "title": "Yamani Weapons", 39 | "date": "", 40 | "relation": "", 41 | "source": "", 42 | "coverage": "Partial", 43 | "contributor": "Yuki, Neal", 44 | "identifier": "42/1", 45 | "metadataappliestoidentifier": "5d78a2a5-57a6-430f-87b2-b89fb3ccb050", 46 | "subject": "Glaives", 47 | "status": "UPDATED" 48 | } 49 | }, 50 | { 51 | "pk": "3e48343d-e2d2-4956-aaa3-b54d26eb9761", 52 | "model": "main.metadataappliestotype", 53 | "fields": { 54 | "lastmodified": "2012-10-01T17:25:05Z", 55 | "replaces": null, 56 | "description": "SIP" 57 | } 58 | }, 59 | { 60 | "pk": "45696327-44c5-4e78-849b-e027a189bf4d", 61 | "model": "main.metadataappliestotype", 62 | "fields": { 63 | "lastmodified": "2012-10-01T17:25:05Z", 64 | "replaces": null, 65 | "description": "Transfer" 66 | } 67 | }, 68 | { 69 | "pk": "7f04d9d4-92c2-44a5-93dc-b7bfdf0c1f17", 70 | "model": "main.metadataappliestotype", 71 | "fields": { 72 | "lastmodified": "2012-10-01T17:25:05Z", 73 | "replaces": null, 74 | "description": "File" 75 | } 76 | } 77 | ] 78 | -------------------------------------------------------------------------------- /tests/client/test_validate_file.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from a3m.client.clientScripts.validate_file import main 4 | from a3m.client.job import Job 5 | from a3m.main.models import SIP 6 | from a3m.main.models import Event 7 | from a3m.main.models import File 8 | from a3m.main.models import FileFormatVersion 9 | 10 | 11 | @pytest.fixture 12 | def sip(tmp_path): 13 | sip_dir = tmp_path / "sip" 14 | sip_dir.mkdir() 15 | # Create logs directory in the SIP. 16 | (sip_dir / "logs").mkdir() 17 | 18 | return SIP.objects.create(currentpath=str(sip_dir)) 19 | 20 | 21 | @pytest.fixture 22 | def file_obj(tmp_path, sip): 23 | d = tmp_path / "dir" 24 | d.mkdir() 25 | txt_file = d / "file.txt" 26 | txt_file.write_text("hello world") 27 | 28 | f = File.objects.create( 29 | sip=sip, originallocation=txt_file, currentlocation=txt_file 30 | ) 31 | f.fileformatversion_set.create( 32 | # Known format version with validation rule. 33 | format_version_id="45928c95-1fea-4b2b-af54-9aa3807e26a2", 34 | ) 35 | 36 | return f 37 | 38 | 39 | @pytest.fixture 40 | def file_format_version(file_obj, format_version): 41 | FileFormatVersion.objects.create(file_uuid=file_obj, format_version=format_version) 42 | 43 | 44 | @pytest.mark.django_db 45 | def test_main(mocker, sip, file_obj): 46 | exit_status = 0 47 | stdout = '{"eventOutcomeInformation": "pass", "eventOutcomeDetailNote": "a note"}' 48 | stderr = "" 49 | execute_or_run = mocker.patch( 50 | "a3m.client.clientScripts.validate_file.executeOrRun", 51 | return_value=(exit_status, stdout, stderr), 52 | ) 53 | job = mocker.Mock(spec=Job) 54 | file_type = "original" 55 | 56 | main( 57 | job=job, 58 | file_path=file_obj.currentlocation, 59 | file_uuid=file_obj.uuid, 60 | sip_uuid=sip.uuid, 61 | shared_path=sip.currentpath, 62 | file_type=file_type, 63 | ) 64 | 65 | # Check the executed script. 66 | called_args = execute_or_run.call_args 67 | assert called_args.kwargs["type"] == "pythonScript" 68 | assert called_args.kwargs["printing"] is False 69 | assert called_args.kwargs["arguments"] == [file_obj.currentlocation] 70 | 71 | # Verify a PREMIS validation event was created with the output of the 72 | # validation command. 73 | assert ( 74 | Event.objects.filter( 75 | file_uuid=file_obj.uuid, 76 | event_type="validation", 77 | event_outcome="pass", 78 | event_outcome_detail="a note", 79 | ).count() 80 | == 1 81 | ) 82 | -------------------------------------------------------------------------------- /tests/common/fixtures/test-identifiers-MODS-METS.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Yamani Weapons 5 | 6 | 7 | Keladry of Mindelan 8 | 9 | Provenance 10 | 11 | 12 | 13 | 14 | Yuki 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | F_374_18 27 | 28475 28 | Yamani 29 | 30 | Glaive 18 31 | http://archives.tortall.gov/yamani/permalink/28475 32 | Public domain 33 | 34 | Researchers must contact the Royal University in Corus, Tortall for permission to use or reproduce. 35 | There are no restrictions on access. 36 | Yamani Islands 37 | 38 | 39 | 40 | 41 | Yamani 42 | 43 | Weapons 44 | 45 | 46 | 47 | 48 | Archival 49 | 50 | 400-450 51 | 52 | 53 | Photographs 54 | 55 | one photograph 56 | b&w 57 | 16 cm x 10 cm 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | eng 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | Server 5 | ------ 6 | 7 | a3m can be executed in server mode via **a3md**:: 8 | 9 | a3md 10 | 11 | It launches a gRPC server and several subsystems that enable processing. Use a 12 | service manager such as systemd to configure it as a service. 13 | 14 | .. note:: 15 | 16 | By default, **a3md** does not log messages with level ``DEBUG`` and 17 | generally tries to keep the log stream unobstructed unless human 18 | intervention is required. 19 | 20 | For debugging purposes, you can access to all messages by setting the 21 | environment string ``A3M_DEBUG==yes``. 22 | 23 | Client 24 | ------ 25 | 26 | **a3m** is the command-line interface that aims to provide a rich text-based 27 | user experience. It communicates with the server via gRPC. Use as follows:: 28 | 29 | a3m --address=127.0.0.1:7000 ~/Documents/pictures 30 | 31 | When the ``--address`` option is not included, **a3m** runs its own embedded 32 | instance of the server:: 33 | 34 | a3m ~/Documents/pictures 35 | 36 | Processing directory 37 | -------------------- 38 | 39 | a3m uses a processing directory to store its database and all created AIPs. 40 | If you are using Linux, this directory can be found under `~/.local/share/a3m` 41 | and these are its contents:: 42 | 43 | . 44 | ├── db.sqlite 45 | └── share 46 | ├── completed 47 | │   └── Test-fa1d6cb3-c1fd-4618-ba55-32f01fda8198.7z 48 | ├── currentlyProcessing 49 | │   ├── ingest 50 | │   └── transfer 51 | ├── failed 52 | │   ├── 0d117bed-2124-48a2-b9d7-f32514d39c1e 53 | ├── policies 54 | └── tmp 55 | 56 | 57 | Processing configuration 58 | ------------------------ 59 | 60 | a3m abandons the XML-based processing configuration document used by 61 | Archivematica. Instead, users are asked to submit the configuration as part 62 | of their transfer requests. 63 | 64 | With our client, ``--processing-config`` can be used multiple times to indicate 65 | the desired settings:: 66 | 67 | a3m --name="Test" --processing-config="normalize=no" http://... 68 | 69 | The Python client can do similarly:: 70 | 71 | from a3m.api.transferservice.v1beta1.request_response_pb2 import ( 72 | ProcessingConfig 73 | ) 74 | 75 | c = Client(...) 76 | c.submit( 77 | url="URL...", name="Name...", 78 | config=ProcessingConfig(normalize=False)) 79 | 80 | The full list of settings or their defaults are not described yet but it can be 81 | found in the `ProcessingConfig`_ message type of the API. 82 | 83 | .. _`ProcessingConfig`: https://buf.build/artefactual/a3m/docs/main:a3m.api.transferservice.v1beta1#a3m.api.transferservice.v1beta1.ProcessingConfig 84 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/copy_transfer_submission_documentation.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import shutil 19 | 20 | from a3m.archivematicaFunctions import find_transfer_path_from_ingest 21 | from a3m.bag import is_bag 22 | from a3m.main.models import File 23 | 24 | 25 | def call(jobs): 26 | for job in jobs: 27 | with job.JobContext(): 28 | sipUUID = job.args[1] 29 | submissionDocumentationDirectory = job.args[2] 30 | sharedPath = job.args[3] 31 | 32 | transfer_locations = ( 33 | File.objects.filter( 34 | removedtime__isnull=True, 35 | sip_id=sipUUID, 36 | transfer__currentlocation__isnull=False, 37 | ) 38 | .values_list("transfer__currentlocation", flat=True) 39 | .distinct() 40 | ) 41 | 42 | for transferLocation in transfer_locations: 43 | transferNameUUID = os.path.basename(os.path.abspath(transferLocation)) 44 | transferLocation = find_transfer_path_from_ingest( 45 | transferLocation, sharedPath 46 | ) 47 | job.pyprint("Transfer found in", transferLocation) 48 | 49 | src = os.path.join( 50 | transferLocation, "metadata", "submissionDocumentation" 51 | ) 52 | dst = os.path.join( 53 | submissionDocumentationDirectory, "transfer-%s" % (transferNameUUID) 54 | ) 55 | 56 | if is_bag(transferLocation): 57 | src = os.path.join( 58 | transferLocation, 59 | "data", 60 | "metadata", 61 | "submissionDocumentation", 62 | ) 63 | job.pyprint(src, " -> ", dst) 64 | shutil.copytree(src, dst) 65 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/manual_normalization_remove_mn_directories.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from django.db import transaction 5 | 6 | from a3m import databaseFunctions 7 | from a3m.main.models import File 8 | 9 | 10 | def recursivelyRemoveEmptyDirectories(job, dir): 11 | error_count = 0 12 | for root, dirs, files in os.walk(dir, topdown=False): 13 | for directory in dirs: 14 | try: 15 | os.rmdir(os.path.join(root, directory)) 16 | except OSError as e: 17 | job.pyprint( 18 | f"{directory} could not be deleted: {e.args}", file=sys.stderr 19 | ) 20 | error_count += 1 21 | return error_count 22 | 23 | 24 | def call(jobs): 25 | with transaction.atomic(): 26 | for job in jobs: 27 | with job.JobContext(): 28 | SIPDirectory = job.args[1] 29 | manual_normalization_dir = os.path.join( 30 | SIPDirectory, "objects", "manualNormalization" 31 | ) 32 | 33 | errorCount = 0 34 | 35 | if os.path.isdir(manual_normalization_dir): 36 | # Delete normalization.csv if present 37 | normalization_csv = os.path.join( 38 | manual_normalization_dir, "normalization.csv" 39 | ) 40 | if os.path.isfile(normalization_csv): 41 | os.remove(normalization_csv) 42 | # Need SIP UUID to get file UUID to remove file in DB 43 | sipUUID = SIPDirectory[-37:-1] # Account for trailing / 44 | 45 | f = File.objects.get( 46 | removedtime__isnull=True, 47 | originallocation__endswith="normalization.csv", 48 | sip_id=sipUUID, 49 | ) 50 | databaseFunctions.fileWasRemoved(f.uuid) 51 | 52 | # Recursively delete empty manual normalization dir 53 | try: 54 | errorCount += recursivelyRemoveEmptyDirectories( 55 | job, manual_normalization_dir 56 | ) 57 | os.rmdir(manual_normalization_dir) 58 | except OSError as e: 59 | job.pyprint( 60 | "{} could not be deleted: {}".format( 61 | manual_normalization_dir, e.args 62 | ), 63 | file=sys.stderr, 64 | ) 65 | errorCount += 1 66 | 67 | job.set_status(errorCount) 68 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/store_file_modification_dates.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2017 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import logging 18 | import os 19 | from datetime import datetime 20 | from datetime import timezone 21 | 22 | from django.db import transaction 23 | 24 | from a3m.main import models 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | def get_modification_date(file_path): 30 | mod_time = os.path.getmtime(file_path) 31 | return datetime.fromtimestamp(int(mod_time), tz=timezone.utc) 32 | 33 | 34 | def main(transfer_uuid, shared_directory_path): 35 | transfer = models.Transfer.objects.get(uuid=transfer_uuid) 36 | 37 | files = models.File.objects.filter(transfer=transfer) 38 | mods_stored = 0 39 | for transfer_file in files: 40 | try: 41 | file_path_relative_to_shared_directory = ( 42 | transfer_file.currentlocation.replace( 43 | "%transferDirectory%", transfer.currentlocation, 1 44 | ) 45 | ) 46 | except AttributeError: 47 | logger.debug( 48 | "No modification date stored for file %s because it has no current location. It was probably a deleted compressed package.", 49 | transfer_file.uuid, 50 | ) 51 | else: 52 | file_path = file_path_relative_to_shared_directory.replace( 53 | "%sharedPath%", shared_directory_path, 1 54 | ) 55 | transfer_file.modificationtime = get_modification_date(file_path) 56 | transfer_file.save() 57 | mods_stored += 1 58 | 59 | logger.debug("Stored modification dates of %d files.", mods_stored) 60 | 61 | 62 | def call(jobs): 63 | with transaction.atomic(): 64 | for job in jobs: 65 | with job.JobContext(logger=logger): 66 | transfer_uuid = job.args[1] 67 | shared_directory_path = job.args[2] 68 | main(transfer_uuid, shared_directory_path) 69 | job.set_status(0) 70 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/verify_transfer_compliance.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import sys 19 | 20 | from .verify_sip_compliance import checkDirectory 21 | 22 | REQUIRED_DIRECTORIES = ( 23 | "objects", 24 | "logs", 25 | "metadata", 26 | "metadata/submissionDocumentation", 27 | ) 28 | 29 | ALLOWABLE_FILES: tuple[str, ...] = tuple() 30 | 31 | 32 | def verifyDirectoriesExist(job, SIPDir, ret=0): 33 | for directory in REQUIRED_DIRECTORIES: 34 | if not os.path.isdir(os.path.join(SIPDir, directory)): 35 | job.pyprint( 36 | "Required Directory Does Not Exist: " + directory, file=sys.stderr 37 | ) 38 | ret += 1 39 | return ret 40 | 41 | 42 | def verifyNothingElseAtTopLevel(job, SIPDir, ret=0): 43 | for entry in os.listdir(SIPDir): 44 | if os.path.isdir(os.path.join(SIPDir, entry)): 45 | if entry not in REQUIRED_DIRECTORIES: 46 | job.pyprint("Error, directory exists: " + entry, file=sys.stderr) 47 | ret += 1 48 | else: 49 | if entry not in ALLOWABLE_FILES: 50 | job.pyprint("Error, file exists: " + entry, file=sys.stderr) 51 | ret += 1 52 | return ret 53 | 54 | 55 | def verifyThereAreFiles(job, SIPDir, ret=0): 56 | """Make sure there are files in the transfer.""" 57 | if not any(files for (_, _, files) in os.walk(SIPDir)): 58 | job.pyprint("Error, no files found", file=sys.stderr) 59 | ret += 1 60 | return ret 61 | 62 | 63 | def call(jobs): 64 | for job in jobs: 65 | with job.JobContext(): 66 | SIPDir = job.args[1] 67 | ret = verifyDirectoriesExist(job, SIPDir) 68 | ret = verifyNothingElseAtTopLevel(job, SIPDir, ret) 69 | ret = checkDirectory(job, SIPDir, ret) 70 | ret = verifyThereAreFiles(job, SIPDir, ret) 71 | if ret != 0: 72 | import time 73 | 74 | time.sleep(10) 75 | job.set_status(ret) 76 | -------------------------------------------------------------------------------- /tests/server/test_backend.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from a3m.server.jobs import Job 4 | from a3m.server.tasks import Task 5 | from a3m.server.tasks import TaskBackend 6 | from a3m.server.tasks import get_task_backend 7 | 8 | 9 | class MockJob(Job): 10 | def __init__(self, *args, **kwargs): 11 | self.name = kwargs.pop("name", "") 12 | super().__init__(*args, **kwargs) 13 | 14 | def run(self, *args, **kwargs): 15 | pass 16 | 17 | 18 | @pytest.fixture 19 | def simple_job(request, mocker): 20 | return MockJob(mocker.Mock(), mocker.Mock(), mocker.Mock(), name="test_v0.0") 21 | 22 | 23 | @pytest.fixture 24 | def simple_task(request): 25 | return Task( 26 | "command", 27 | "a argument string", 28 | "/dev/stdoutfile", 29 | "/tmp/stderrfile", 30 | {r"%relativeLocation%": "testfile"}, 31 | wants_output=False, 32 | ) 33 | 34 | 35 | def format_result(task_results): 36 | """Accepts task results as a tuple of (uuid, result_dict).""" 37 | response = {"task_results": {}} 38 | for task_uuid, task_data in task_results: 39 | task_uuid = str(task_uuid) 40 | response["task_results"][task_uuid] = task_data 41 | 42 | return response 43 | 44 | 45 | # test_gearman_task_submission 46 | # test_gearman_task_result_success 47 | # test_gearman_task_result_error 48 | 49 | 50 | def test_multiple_batches(simple_job, simple_task, mocker): 51 | mocker.patch("a3m.server.tasks.backends.pool_backend.Task.bulk_log") 52 | mocker.patch("a3m.server.tasks.backends.pool_backend.Task.write_output") 53 | mocker.patch("a3m.server.tasks.backends.pool_backend.init_counter_labels") 54 | mocker.patch.object(TaskBackend, "TASK_BATCH_SIZE", 2) 55 | 56 | def execute_command(task_name: str, batch_payload): 57 | assert task_name == "test_v0.0" 58 | return { 59 | "task_results": { 60 | task_id: { 61 | "exitCode": 0, 62 | "stdout": "stdout example", 63 | "stderr": "stderr example", 64 | } 65 | for task_id, task in batch_payload["tasks"].items() 66 | } 67 | } 68 | 69 | execute_command = mocker.patch( 70 | "a3m.server.tasks.backends.pool_backend.execute_command", 71 | side_effect=execute_command, 72 | ) 73 | 74 | backend = get_task_backend() 75 | 76 | for item in range(3): 77 | backend.submit_task(simple_job, simple_task) 78 | 79 | results = list(backend.wait_for_results(simple_job)) 80 | assert execute_command.call_count == 2 81 | assert len(results) == 3 82 | assert results[0].done is True 83 | assert results[0].exit_code == 0 84 | assert results[1].done is True 85 | assert results[1].exit_code == 0 86 | assert results[2].done is True 87 | assert results[2].exit_code == 0 88 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/verify_sip_compliance.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import sys 19 | 20 | from a3m.client import metrics 21 | 22 | REQUIRED_DIRECTORIES = ( 23 | "objects", 24 | "logs", 25 | "metadata", 26 | "metadata/submissionDocumentation", 27 | ) 28 | 29 | ALLOWABLE_FILES = () 30 | 31 | 32 | def checkDirectory(job, directory, ret=0): 33 | try: 34 | for directory, _, files in os.walk(directory): 35 | for file in files: 36 | os.path.join(directory, file) 37 | except Exception as inst: 38 | job.pyprint("Error navigating directory:", directory.__str__(), file=sys.stderr) 39 | job.pyprint(type(inst), file=sys.stderr) 40 | job.pyprint(inst.args, file=sys.stderr) 41 | ret += 1 42 | return ret 43 | 44 | 45 | def verifyDirectoriesExist(job, SIPDir, ret=0): 46 | for directory in REQUIRED_DIRECTORIES: 47 | if not os.path.isdir(os.path.join(SIPDir, directory)): 48 | job.pyprint( 49 | "Required Directory Does Not Exist: " + directory, file=sys.stderr 50 | ) 51 | ret += 1 52 | return ret 53 | 54 | 55 | def verifyNothingElseAtTopLevel(job, SIPDir, ret=0): 56 | for entry in os.listdir(SIPDir): 57 | if os.path.isdir(os.path.join(SIPDir, entry)): 58 | if entry not in REQUIRED_DIRECTORIES: 59 | job.pyprint("Error, directory exists: " + entry, file=sys.stderr) 60 | ret += 1 61 | else: 62 | if entry not in ALLOWABLE_FILES: 63 | job.pyprint("Error, file exists: " + entry, file=sys.stderr) 64 | ret += 1 65 | return ret 66 | 67 | 68 | def call(jobs): 69 | for job in jobs: 70 | with job.JobContext(): 71 | SIPDir = job.args[1] 72 | ret = verifyDirectoriesExist(job, SIPDir) 73 | ret = verifyNothingElseAtTopLevel(job, SIPDir, ret) 74 | ret = checkDirectory(job, SIPDir, ret) 75 | if ret != 0: 76 | import time 77 | 78 | time.sleep(10) 79 | job.set_status(ret) 80 | 81 | metrics.sip_started() 82 | -------------------------------------------------------------------------------- /tests/client/test_store_file_modification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | 5 | from django.test import TestCase 6 | 7 | from a3m.client.clientScripts import store_file_modification_dates 8 | from a3m.main import models 9 | 10 | THIS_DIR = os.path.dirname(__file__) 11 | 12 | 13 | class TestStoreFileModification(TestCase): 14 | """Test store_file_modification_dates.""" 15 | 16 | fixture_files = ["transfer.json", "files-transfer-unicode.json"] 17 | fixtures = [os.path.join(THIS_DIR, "fixtures", p) for p in fixture_files] 18 | 19 | transfer_uuid = "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6" 20 | temp_dir = tempfile.mkdtemp() 21 | 22 | def tearDown(self): 23 | transfer = models.Transfer.objects.get(uuid=self.transfer_uuid) 24 | transfer_path = transfer.currentlocation.replace( 25 | "%sharedPath%", self.temp_dir + "/" 26 | ) 27 | shutil.rmtree(transfer_path) 28 | 29 | def test_store_file_modification_dates(self): 30 | """Test store_file_modification_dates. 31 | 32 | It should store file modification dates. 33 | """ 34 | 35 | # Create files 36 | transfer = models.Transfer.objects.get(uuid=self.transfer_uuid) 37 | transfer_path = transfer.currentlocation.replace( 38 | "%sharedPath%", self.temp_dir + "/" 39 | ) 40 | transfer.save() 41 | 42 | for f in models.File.objects.filter(transfer_id=self.transfer_uuid): 43 | path = f.currentlocation.replace("%transferDirectory%", transfer_path) 44 | dirname = os.path.dirname(path) 45 | if not os.path.exists(dirname): 46 | os.makedirs(dirname) 47 | with open(path, "w") as f: 48 | f.write(path) 49 | os.utime(path, (1339485682, 1339485682)) 50 | 51 | # Store file modification dates 52 | store_file_modification_dates.main(self.transfer_uuid, self.temp_dir + "/") 53 | 54 | # Assert files have expected modification times 55 | assert ( 56 | str( 57 | models.File.objects.get( 58 | pk="47813453-6872-442b-9d65-6515be3c5aa1" 59 | ).modificationtime 60 | ) 61 | == "2012-06-12 07:21:22+00:00" 62 | ) 63 | assert ( 64 | str( 65 | models.File.objects.get( 66 | pk="60e5c61b-14ef-4e92-89ec-9b9201e68adb" 67 | ).modificationtime 68 | ) 69 | == "2012-06-12 07:21:22+00:00" 70 | ) 71 | assert ( 72 | str( 73 | models.File.objects.get( 74 | pk="791e07ea-ad44-4315-b55b-44ec771e95cf" 75 | ).modificationtime 76 | ) 77 | == "2012-06-12 07:21:22+00:00" 78 | ) 79 | assert ( 80 | str( 81 | models.File.objects.get( 82 | pk="8a1f0b59-cf94-47ef-8078-647b77c8a147" 83 | ).modificationtime 84 | ) 85 | == "2012-06-12 07:21:22+00:00" 86 | ) 87 | -------------------------------------------------------------------------------- /a3m/api/transferservice/v1beta1/service_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # NO CHECKED-IN PROTOBUF GENCODE 4 | # source: a3m/api/transferservice/v1beta1/service.proto 5 | # Protobuf Python Version: 5.28.2 6 | """Generated protocol buffer code.""" 7 | 8 | from google.protobuf import descriptor as _descriptor 9 | from google.protobuf import descriptor_pool as _descriptor_pool 10 | from google.protobuf import runtime_version as _runtime_version 11 | from google.protobuf import symbol_database as _symbol_database 12 | from google.protobuf.internal import builder as _builder 13 | 14 | _runtime_version.ValidateProtobufRuntimeVersion( 15 | _runtime_version.Domain.PUBLIC, 16 | 5, 17 | 28, 18 | 2, 19 | "", 20 | "a3m/api/transferservice/v1beta1/service.proto", 21 | ) 22 | # @@protoc_insertion_point(imports) 23 | 24 | _sym_db = _symbol_database.Default() 25 | 26 | 27 | from a3m.api.transferservice.v1beta1 import ( 28 | request_response_pb2 as a3m_dot_api_dot_transferservice_dot_v1beta1_dot_request__response__pb2, 29 | ) 30 | 31 | 32 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( 33 | b'\n-a3m/api/transferservice/v1beta1/service.proto\x12\x1f\x61\x33m.api.transferservice.v1beta1\x1a\x36\x61\x33m/api/transferservice/v1beta1/request_response.proto2\xc5\x03\n\x0fTransferService\x12k\n\x06Submit\x12..a3m.api.transferservice.v1beta1.SubmitRequest\x1a/.a3m.api.transferservice.v1beta1.SubmitResponse"\x00\x12\x65\n\x04Read\x12,.a3m.api.transferservice.v1beta1.ReadRequest\x1a-.a3m.api.transferservice.v1beta1.ReadResponse"\x00\x12t\n\tListTasks\x12\x31.a3m.api.transferservice.v1beta1.ListTasksRequest\x1a\x32.a3m.api.transferservice.v1beta1.ListTasksResponse"\x00\x12h\n\x05\x45mpty\x12-.a3m.api.transferservice.v1beta1.EmptyRequest\x1a..a3m.api.transferservice.v1beta1.EmptyResponse"\x00\x42\xa9\x02\n#com.a3m.api.transferservice.v1beta1B\x0cServiceProtoP\x01ZUgithub.com/artefactual-labs/a3m/proto/a3m/api/transferservice/v1beta1;transferservice\xa2\x02\x03\x41\x41T\xaa\x02\x1f\x41\x33m.Api.Transferservice.V1beta1\xca\x02\x1f\x41\x33m\\Api\\Transferservice\\V1beta1\xe2\x02+A3m\\Api\\Transferservice\\V1beta1\\GPBMetadata\xea\x02"A3m::Api::Transferservice::V1beta1b\x06proto3' 34 | ) 35 | 36 | _globals = globals() 37 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) 38 | _builder.BuildTopDescriptorsAndMessages( 39 | DESCRIPTOR, "a3m.api.transferservice.v1beta1.service_pb2", _globals 40 | ) 41 | if not _descriptor._USE_C_DESCRIPTORS: 42 | _globals["DESCRIPTOR"]._loaded_options = None 43 | _globals[ 44 | "DESCRIPTOR" 45 | ]._serialized_options = b'\n#com.a3m.api.transferservice.v1beta1B\014ServiceProtoP\001ZUgithub.com/artefactual-labs/a3m/proto/a3m/api/transferservice/v1beta1;transferservice\242\002\003AAT\252\002\037A3m.Api.Transferservice.V1beta1\312\002\037A3m\\Api\\Transferservice\\V1beta1\342\002+A3m\\Api\\Transferservice\\V1beta1\\GPBMetadata\352\002"A3m::Api::Transferservice::V1beta1' 46 | _globals["_TRANSFERSERVICE"]._serialized_start = 139 47 | _globals["_TRANSFERSERVICE"]._serialized_end = 592 48 | # @@protoc_insertion_point(module_scope) 49 | -------------------------------------------------------------------------------- /tests/common/fixtures/test_find_by_id_refid.yaml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: password=admin 4 | headers: 5 | Accept: ['*/*'] 6 | Accept-Encoding: ['gzip, deflate'] 7 | Connection: [keep-alive] 8 | Content-Length: ['14'] 9 | Content-Type: [application/x-www-form-urlencoded] 10 | User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] 11 | method: POST 12 | uri: http://localhost:8089/users/admin/login 13 | response: 14 | body: {string: !!python/unicode '{"session":"a7044300d27191551e5940423d1c0e9fb7a49354cab8d986eddcb1e940b9e6b4","user":{"lock_version":1508,"username":"admin","name":"Administrator","is_system_user":true,"create_time":"2014-12-05T20:32:17Z","system_mtime":"2015-09-23T00:07:23Z","user_mtime":"2015-09-23T00:07:23Z","jsonmodel_type":"user","groups":[],"is_admin":false,"uri":"/users/1","agent_record":{"ref":"/agents/people/1"},"permissions":{"/repositories/2":["view_repository","update_accession_record","update_resource_record","update_digital_object_record"],"_archivesspace":[]}}} 15 | 16 | '} 17 | headers: 18 | cache-control: ['private, must-revalidate, max-age=0'] 19 | content-length: ['551'] 20 | content-type: [application/json] 21 | date: ['Wed, 23 Sep 2015 00:07:23 GMT'] 22 | server: [Jetty(8.1.5.v20120716)] 23 | x-content-type-options: [nosniff] 24 | status: {code: 200, message: OK} 25 | - request: 26 | body: null 27 | headers: 28 | Accept: ['*/*'] 29 | Accept-Encoding: ['gzip, deflate'] 30 | Connection: [keep-alive] 31 | User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] 32 | X-ArchivesSpace-Session: [!!python/unicode 'a7044300d27191551e5940423d1c0e9fb7a49354cab8d986eddcb1e940b9e6b4'] 33 | method: GET 34 | uri: http://localhost:8089/repositories/2/find_by_id/archival_objects?resolve%5B%5D=archival_objects&ref_id%5B%5D=a118514fab1b2ee6a7e9ad259e1de355 35 | response: 36 | body: {string: !!python/unicode '{"archival_objects":[{"ref":"/repositories/2/archival_objects/752250","_resolved":{"lock_version":0,"position":0,"publish":true,"ref_id":"a118514fab1b2ee6a7e9ad259e1de355","component_id":"test111","title":"Test 37 | AO","display_string":"Test AO","restrictions_apply":false,"created_by":"admin","last_modified_by":"admin","create_time":"2015-09-22T18:35:41Z","system_mtime":"2015-09-22T18:35:41Z","user_mtime":"2015-09-22T18:35:41Z","suppressed":false,"level":"file","jsonmodel_type":"archival_object","external_ids":[],"subjects":[],"linked_events":[],"extents":[],"dates":[],"external_documents":[],"rights_statements":[],"linked_agents":[],"instances":[],"notes":[],"uri":"/repositories/2/archival_objects/752250","repository":{"ref":"/repositories/2"},"resource":{"ref":"/repositories/2/resources/11319"},"has_unpublished_ancestor":false}}]} 38 | 39 | '} 40 | headers: 41 | cache-control: ['private, must-revalidate, max-age=0'] 42 | content-length: ['841'] 43 | content-type: [application/json] 44 | date: ['Wed, 23 Sep 2015 00:07:23 GMT'] 45 | server: [Jetty(8.1.5.v20120716)] 46 | x-content-type-options: [nosniff] 47 | status: {code: 200, message: OK} 48 | version: 1 49 | -------------------------------------------------------------------------------- /a3m/server/jobs/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | A base class for other Job types to inherit from. 3 | """ 4 | 5 | import abc 6 | import logging 7 | import uuid 8 | 9 | from django.utils import timezone 10 | 11 | from a3m.main import models 12 | from a3m.server.db import auto_close_old_connections 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class Job(metaclass=abc.ABCMeta): 18 | """ 19 | A single job, corresponding to a workflow link, and the `Job` model in the 20 | database. 21 | 22 | Subclasses must implement a `run` method; it will be called in a thread via 23 | `executor.submit`, and should return the next job to be processed. 24 | """ 25 | 26 | # Mirror job model statuses, so that we can mostly avoid referencing 27 | # the job model 28 | STATUSES = models.Job.STATUS 29 | STATUS_UNKNOWN = models.Job.STATUS_UNKNOWN 30 | STATUS_COMPLETED_SUCCESSFULLY = models.Job.STATUS_COMPLETED_SUCCESSFULLY 31 | STATUS_EXECUTING_COMMANDS = models.Job.STATUS_EXECUTING_COMMANDS 32 | STATUS_FAILED = models.Job.STATUS_FAILED 33 | 34 | def __init__(self, job_chain, link, package): 35 | self.uuid = uuid.uuid4() 36 | self.job_chain = job_chain 37 | self.package = package 38 | self.link = link 39 | self.created_at = timezone.now() 40 | self.group = link.get_label("group", "en") 41 | self.description = link.get_label("description", "en") 42 | 43 | # always zero for non task jobs 44 | self.exit_code = 0 45 | 46 | @classmethod 47 | @auto_close_old_connections() 48 | def cleanup_old_db_entries(cls): 49 | """Update the status of any in progress jobs. 50 | 51 | This command is run on startup. 52 | TODO: we could try to recover, instead of just failing. 53 | """ 54 | models.Job.objects.filter(currentstep=cls.STATUS_EXECUTING_COMMANDS).update( 55 | currentstep=cls.STATUS_FAILED 56 | ) 57 | 58 | @abc.abstractmethod 59 | def run(self): 60 | """ 61 | Run the actual job. 62 | 63 | This method is executed via ThreadPoolExecutor and returns the _next_ job 64 | to process. 65 | """ 66 | 67 | @auto_close_old_connections() 68 | def save_to_db(self): 69 | return models.Job.objects.create( 70 | jobuuid=self.uuid, 71 | jobtype=self.description, 72 | directory=self.package.current_path_for_db, 73 | sipuuid=self.package.subid, 74 | currentstep=self.STATUS_EXECUTING_COMMANDS, 75 | unittype=self.package.unit_type, 76 | microservicegroup=self.group, 77 | createdtime=self.created_at, 78 | createdtimedec=float(self.created_at.strftime("0.%f")), 79 | microservicechainlink=self.link.id, 80 | ) 81 | 82 | @auto_close_old_connections() 83 | def mark_complete(self): 84 | logger.debug( 85 | "%s %s done with exit code %s", 86 | self.__class__.__name__, 87 | self.uuid, 88 | self.exit_code, 89 | ) 90 | return models.Job.objects.filter(jobuuid=self.uuid).update( 91 | currentstep=self.STATUS_COMPLETED_SUCCESSFULLY 92 | ) 93 | -------------------------------------------------------------------------------- /tests/client/fixtures/custom_structmaps/custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929/objects/metadata/transfers/custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51/missing_contentid.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /tests/common/test_env_configparser.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os 3 | from io import StringIO 4 | 5 | import pytest 6 | from django.test import TestCase 7 | 8 | from a3m.appconfig import EnvConfigParser 9 | 10 | 11 | class TestConfigReader(TestCase): 12 | def setUp(self): 13 | """ 14 | Make sure that we are not mutating the global environment. `os.environ` 15 | is an instance of `os._Environ` which implements a `copy` method. 16 | """ 17 | self.environ = os.environ.copy() 18 | 19 | def tearDown(self): 20 | self.environ = None 21 | 22 | def read_test_config(self, test_config, prefix=""): 23 | buf = StringIO(test_config) 24 | config = EnvConfigParser(env=self.environ, prefix=prefix) 25 | config.read_file(buf) 26 | return config 27 | 28 | def test_env_lookup_int(self): 29 | """ 30 | Note that the environment precedes the configuration. 31 | """ 32 | self.environ["ARCHIVEMATICA_NICESERVICE_QUEUE_MAX_SIZE"] = "100" 33 | config = self.read_test_config( 34 | prefix="ARCHIVEMATICA_NICESERVICE", 35 | test_config=""" 36 | [queue] 37 | max_size = 500 38 | """, 39 | ) 40 | assert config.getint("queue", "max_size") == 100 41 | 42 | def test_env_lookup_nosection_bool(self): 43 | """ 44 | The environment string matches the option even though the corresponding 45 | section was not included. 46 | """ 47 | self.environ["ARCHIVEMATICA_NICESERVICE_TLS"] = "off" 48 | config = self.read_test_config( 49 | prefix="ARCHIVEMATICA_NICESERVICE", 50 | test_config=""" 51 | [network] 52 | tls = on 53 | """, 54 | ) 55 | assert config.getboolean("network", "tls") is False 56 | 57 | def test_unknown_section(self): 58 | """ 59 | Confirm that `EnvConfigParser` throws a `NoSectionError` exception 60 | when undefined. 61 | """ 62 | config = self.read_test_config( 63 | """ 64 | [main] 65 | foo = bar 66 | """ 67 | ) 68 | with pytest.raises(configparser.NoSectionError): 69 | assert config.get("undefined_section", "foo") 70 | 71 | def test_unknown_option(self): 72 | """ 73 | Confirm that `EnvConfigParser` throws a `NoOptionError` exception 74 | when undefined. 75 | """ 76 | config = self.read_test_config( 77 | """ 78 | [main] 79 | foo = bar 80 | """ 81 | ) 82 | with pytest.raises(configparser.NoOptionError): 83 | assert config.get("main", "undefined_option") 84 | 85 | def test_unknown_option_with_fallback(self): 86 | """ 87 | A fallback keyword argument can be used to obtain a value from the 88 | configuration even if it's undefiend. 89 | """ 90 | config = self.read_test_config( 91 | """ 92 | [main] 93 | foo = bar 94 | """ 95 | ) 96 | assert config.getboolean("main", "undefined_option", fallback=True) is True 97 | assert ( 98 | config.getint("undefined_section", "undefined_option", fallback=12345) 99 | == 12345 100 | ) 101 | -------------------------------------------------------------------------------- /tests/client/fixtures/events-transfer.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "fields": { 4 | "event_type": "ingestion", 5 | "event_id": "df02faa6-ab9d-42a5-9857-7b1c11fcfbb7", 6 | "event_detail": "", 7 | "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", 8 | "event_outcome_detail": "", 9 | "agents": [1, 2, 3], 10 | "event_outcome": "", 11 | "event_datetime": "2015-11-30T21:55:35Z" 12 | }, 13 | "model": "main.event", 14 | "pk": 1 15 | }, 16 | { 17 | "fields": { 18 | "event_type": "message digest calculation", 19 | "event_id": "7f2831f3-45f2-4184-86f2-77d0aa9fa473", 20 | "event_detail": "program=\"python\"; module=\"hashlib.sha256()\"", 21 | "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", 22 | "event_outcome_detail": "d2bed92b73c7090bb30a0b30016882e7069c437488e1513e9deaacbe29d38d92", 23 | "agents": [1, 2, 3], 24 | "event_outcome": "", 25 | "event_datetime": "2015-11-30T21:55:36Z" 26 | }, 27 | "model": "main.event", 28 | "pk": 2 29 | }, 30 | { 31 | "fields": { 32 | "event_type": "virus check", 33 | "event_id": "15f71683-4aae-4723-be31-eec0ba15c63d", 34 | "event_detail": "program=\"Clam AV\"; version=\"ClamAV 0.98.7\"; virusDefinitions=\"21117/Mon Nov 30 09:32:13 2015\n\"", 35 | "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", 36 | "event_outcome_detail": "", 37 | "agents": [1, 2, 3], 38 | "event_outcome": "Pass", 39 | "event_datetime": "2015-11-30T21:55:38Z" 40 | }, 41 | "model": "main.event", 42 | "pk": 3 43 | }, 44 | { 45 | "fields": { 46 | "event_type": "filename change", 47 | "event_id": "870d4233-efa9-4360-a6f1-5e4b988efe5d", 48 | "event_detail": "prohibited characters removed:program=\"change_names\"; version=\"1.10.9529a554732f6b96a561fd0adcf2711bb233166b\"", 49 | "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", 50 | "event_outcome_detail": "Original name=\"%transferDirectory%objects/evelyn's photo.jpg\"; cleaned up name=\"%transferDirectory%objects/evelyn_s_photo.jpg\"", 51 | "agents": [1, 2, 3], 52 | "event_outcome": "", 53 | "event_datetime": "2015-11-30T21:55:39Z" 54 | }, 55 | "model": "main.event", 56 | "pk": 4 57 | }, 58 | { 59 | "fields": { 60 | "event_type": "format identification", 61 | "event_id": "a85e12d5-dafa-4a84-8270-3768e9a113b2", 62 | "event_detail": "program=\"Fido\"; version=\"1\"", 63 | "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", 64 | "event_outcome_detail": "fmt/44", 65 | "agents": [1, 2, 3], 66 | "event_outcome": "Positive", 67 | "event_datetime": "2015-11-30T21:55:40Z" 68 | }, 69 | "model": "main.event", 70 | "pk": 5 71 | }, 72 | { 73 | "fields": { 74 | "event_type": "validation", 75 | "event_id": "e631350b-f16c-41cf-8939-bc8afcb13151", 76 | "event_detail": "program=\"JHOVE\"; version=\"1.6\"", 77 | "file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f", 78 | "event_outcome_detail": "format=\"JPEG\"; version=\"1.02\"; result=\"Well-Formed and valid\"", 79 | "agents": [1, 2, 3], 80 | "event_outcome": "pass", 81 | "event_datetime": "2015-11-30T21:55:44Z" 82 | }, 83 | "model": "main.event", 84 | "pk": 6 85 | } 86 | ] 87 | -------------------------------------------------------------------------------- /proto/a3m/api/transferservice/v1beta1/request_response.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package a3m.api.transferservice.v1beta1; 4 | 5 | option go_package = "github.com/artefactual-labs/a3m/proto/a3m/api/transferservice/v1beta1;transferservice"; 6 | 7 | import "google/protobuf/timestamp.proto"; 8 | 9 | message SubmitRequest { 10 | string name = 1; 11 | string url = 2; 12 | ProcessingConfig config = 3; 13 | } 14 | 15 | message SubmitResponse { 16 | string id = 1; 17 | } 18 | 19 | message ReadRequest { 20 | string id = 1; 21 | } 22 | 23 | message ReadResponse { 24 | PackageStatus status = 1; 25 | string job = 2; 26 | repeated Job jobs = 3; 27 | } 28 | 29 | message ListTasksRequest { 30 | string job_id = 1; 31 | } 32 | 33 | message ListTasksResponse { 34 | repeated Task tasks = 1; 35 | } 36 | 37 | message EmptyRequest { 38 | } 39 | 40 | message EmptyResponse { 41 | } 42 | 43 | enum PackageStatus { 44 | PACKAGE_STATUS_UNSPECIFIED = 0; 45 | PACKAGE_STATUS_FAILED = 1; 46 | PACKAGE_STATUS_REJECTED = 2; 47 | PACKAGE_STATUS_COMPLETE = 3; 48 | PACKAGE_STATUS_PROCESSING = 4; 49 | } 50 | 51 | message Job { 52 | string id = 1; 53 | string name = 2; 54 | string group = 3; 55 | string link_id = 4; 56 | 57 | enum Status { 58 | STATUS_UNSPECIFIED = 0; 59 | STATUS_COMPLETE = 1; 60 | STATUS_PROCESSING = 2; 61 | STATUS_FAILED = 3; 62 | } 63 | 64 | Status status = 5; 65 | google.protobuf.Timestamp start_time = 6; 66 | } 67 | 68 | message Task { 69 | string id = 1; 70 | string file_id = 2; 71 | int32 exit_code = 3; 72 | string filename = 4; 73 | string execution = 5; 74 | string arguments = 6; 75 | string stdout = 7; 76 | string stderr = 8; 77 | google.protobuf.Timestamp start_time = 9; 78 | google.protobuf.Timestamp end_time = 10; 79 | } 80 | 81 | message ProcessingConfig { 82 | bool assign_uuids_to_directories = 1; 83 | bool examine_contents = 2; 84 | bool generate_transfer_structure_report = 3; 85 | bool document_empty_directories = 4; 86 | bool extract_packages = 5; 87 | bool delete_packages_after_extraction = 6; 88 | bool identify_transfer = 7; 89 | // identify_submission_and_metadata represents a single configuration 90 | // attribute that controls two separate file format identification jobs 91 | // in the workflow: one for objects/submissionDocumentation and one 92 | // for objects/metadata 93 | bool identify_submission_and_metadata = 8; 94 | bool identify_before_normalization = 9; 95 | bool normalize = 10; 96 | bool transcribe_files = 11; 97 | bool perform_policy_checks_on_originals = 12; 98 | bool perform_policy_checks_on_preservation_derivatives = 13; 99 | 100 | // AIP compression level (1 is the fastest, 9 is the smallest). 101 | int32 aip_compression_level = 14; 102 | 103 | // AIP compression algorithm 104 | AIPCompressionAlgorithm aip_compression_algorithm = 15; 105 | 106 | enum AIPCompressionAlgorithm { 107 | AIP_COMPRESSION_ALGORITHM_UNSPECIFIED = 0; 108 | AIP_COMPRESSION_ALGORITHM_UNCOMPRESSED = 1; // It breaks in verify_aip. 109 | AIP_COMPRESSION_ALGORITHM_TAR = 2; // Not supported yet! 110 | AIP_COMPRESSION_ALGORITHM_TAR_BZIP2 = 3; 111 | AIP_COMPRESSION_ALGORITHM_TAR_GZIP = 4; 112 | AIP_COMPRESSION_ALGORITHM_S7_COPY = 5; 113 | AIP_COMPRESSION_ALGORITHM_S7_BZIP2 = 6; 114 | AIP_COMPRESSION_ALGORITHM_S7_LZMA = 7; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /tests/client/fixtures/files-transfer-unicode.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "fields": { 4 | "filegrpuuid": "", 5 | "sip": null, 6 | "checksumtype": "sha256", 7 | "originallocation": "%transferDirectory%objects/\u305f\u304f\u3055\u3093 directories/need name change/checking here/ev\u00e9lyn's photo.jpg", 8 | "transfer": "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6", 9 | "filegrpuse": "original", 10 | "removedtime": null, 11 | "label": "", 12 | "checksum": "d2bed92b73c7090bb30a0b30016882e7069c437488e1513e9deaacbe29d38d92", 13 | "enteredsystem": "2017-01-04T19:35:20Z", 14 | "modificationtime": "2017-01-04T19:35:20Z", 15 | "currentlocation": "%transferDirectory%objects/\u305f\u304f\u3055\u3093 directories/need name change/checking here/ev\u00e9lyn's photo.jpg", 16 | "size": 158131 17 | }, 18 | "model": "main.file", 19 | "pk": "47813453-6872-442b-9d65-6515be3c5aa1" 20 | }, 21 | { 22 | "fields": { 23 | "filegrpuuid": "", 24 | "sip": null, 25 | "checksumtype": "sha256", 26 | "originallocation": "%transferDirectory%objects/no_name_change/needed_here/lion.svg", 27 | "transfer": "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6", 28 | "filegrpuse": "original", 29 | "removedtime": null, 30 | "label": "", 31 | "checksum": "f78615cd834f7fb84832177e73f13e3479f5b5b22ae7a9506c7fa0a14fd9df9e", 32 | "enteredsystem": "2017-01-04T19:35:20Z", 33 | "modificationtime": "2017-01-04T19:35:20Z", 34 | "currentlocation": "%transferDirectory%objects/no_name_change/needed_here/lion.svg", 35 | "size": 18324 36 | }, 37 | "model": "main.file", 38 | "pk": "60e5c61b-14ef-4e92-89ec-9b9201e68adb" 39 | }, 40 | { 41 | "fields": { 42 | "filegrpuuid": "", 43 | "sip": null, 44 | "checksumtype": "sha256", 45 | "originallocation": "%transferDirectory%objects/\u305f\u304f\u3055\u3093 directories/need name change/checking here/lion\u5199\u771f.svg", 46 | "transfer": "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6", 47 | "filegrpuse": "original", 48 | "removedtime": null, 49 | "label": "", 50 | "checksum": "f78615cd834f7fb84832177e73f13e3479f5b5b22ae7a9506c7fa0a14fd9df9e", 51 | "enteredsystem": "2017-01-04T19:35:20Z", 52 | "modificationtime": "2017-01-04T19:35:20Z", 53 | "currentlocation": "%transferDirectory%objects/\u305f\u304f\u3055\u3093 directories/need name change/checking here/lion\u5199\u771f.svg", 54 | "size": 18324 55 | }, 56 | "model": "main.file", 57 | "pk": "791e07ea-ad44-4315-b55b-44ec771e95cf" 58 | }, 59 | { 60 | "fields": { 61 | "filegrpuuid": "", 62 | "sip": null, 63 | "checksumtype": "sha256", 64 | "originallocation": "%transferDirectory%objects/has space/lion.svg", 65 | "transfer": "e95ab50f-9c84-45d5-a3ca-1b0b3f58d9b6", 66 | "filegrpuse": "original", 67 | "removedtime": null, 68 | "label": "", 69 | "checksum": "f78615cd834f7fb84832177e73f13e3479f5b5b22ae7a9506c7fa0a14fd9df9e", 70 | "enteredsystem": "2017-01-04T19:35:20Z", 71 | "modificationtime": "2017-01-04T19:35:20Z", 72 | "currentlocation": "%transferDirectory%objects/has space/lion.svg", 73 | "size": 18324 74 | }, 75 | "model": "main.file", 76 | "pk": "8a1f0b59-cf94-47ef-8078-647b77c8a147" 77 | } 78 | ] 79 | -------------------------------------------------------------------------------- /tests/test_registry.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from importlib.resources import files 3 | 4 | import pytest 5 | 6 | from a3m.fpr.registry import JSONBackend 7 | from a3m.fpr.registry import Registry 8 | from a3m.fpr.registry import RulePurpose 9 | from a3m.main.models import File 10 | 11 | 12 | def create_file_with_version_id(id: str) -> File: 13 | f = File.objects.create(uuid=uuid.uuid4()) 14 | f.fileformatversion_set.create(format_version_id=id) 15 | 16 | return f 17 | 18 | 19 | @pytest.fixture(scope="module") 20 | def registry(): 21 | backend = JSONBackend( 22 | files("a3m.fpr.migrations").joinpath("initial-data.json").read_bytes() 23 | ) 24 | return Registry(backend) 25 | 26 | 27 | def test_registry_get_file_rules(db, registry): 28 | """Confirm that it accepts uuid.UUID, str and File.""" 29 | file_obj = create_file_with_version_id( 30 | "082f3282-8331-4da4-b452-632b17e90d66" 31 | ) # fmt/3 32 | assert len(registry.get_file_rules(file_obj.uuid, RulePurpose.THUMBNAIL)) == 1 33 | assert len(registry.get_file_rules(str(file_obj.uuid), RulePurpose.THUMBNAIL)) == 1 34 | assert len(registry.get_file_rules(file_obj, RulePurpose.THUMBNAIL)) == 1 35 | 36 | 37 | def test_registry_integrity(registry): 38 | """Validates the integrity of the registry. 39 | 40 | It depends on implementation details of the JSONBackend. 41 | """ 42 | backend: JSONBackend = registry.backend 43 | 44 | # Verify that all replaced rules are marked as disabled. 45 | for rule in backend.rules.values(): 46 | is_replaced = rule.id in backend.replaced_rules 47 | is_enabled = rule.enabled 48 | assert not ( 49 | is_enabled and is_replaced 50 | ), f"Rule {rule.id} is enabled but has been replaced by Rule {backend.replaced_rules[rule.id]}." 51 | 52 | # Verify that all replaced versions are marked as disabled. 53 | for version in backend.versions.values(): 54 | is_replaced = version.id in backend.replaced_versions 55 | is_enabled = version.enabled 56 | assert not ( 57 | is_enabled and is_replaced 58 | ), f"FormatVersion {version.id} is enabled but has been replaced by FormatVersion {backend.replaced_versions[version.id]}." 59 | 60 | # Verify that all replaced commands are marked as disabled. 61 | for command in backend.commands.values(): 62 | is_replaced = command.id in backend.replaced_versions 63 | is_enabled = command.enabled 64 | assert not ( 65 | is_enabled and is_replaced 66 | ), f"Command {command.id} is enabled but has been replaced by Command {backend.replaced_versions[command.id]}." 67 | 68 | # Verify that rules in service depend on commands and versions in service. 69 | for rule in backend.rules.values(): 70 | is_replaced = rule.id in backend.replaced_rules 71 | is_enabled = rule.enabled 72 | if is_enabled and not is_replaced: 73 | assert ( 74 | rule.command.enabled 75 | and rule.command.id not in backend.replaced_commands 76 | ), f"Rule in service {rule.id} is using a Command not in service: {rule.command} ({rule.command.tool.description})." 77 | assert ( 78 | rule.format.enabled and rule.format.id not in backend.replaced_versions 79 | ), f"Rule in service {rule.id} is using a FormatVersion not in service: {rule.format} ({rule.format.description})." 80 | -------------------------------------------------------------------------------- /a3m/client/assets/mets/xlink.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /tests/server/test_workflow.py: -------------------------------------------------------------------------------- 1 | import os 2 | from io import StringIO 3 | 4 | import pytest 5 | from django.utils.translation import gettext_lazy 6 | 7 | from a3m.server import translation 8 | from a3m.server import workflow 9 | 10 | ASSETS_DIR = os.path.join( 11 | os.path.dirname( 12 | os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))) 13 | ), 14 | "a3m", 15 | "assets", 16 | ) 17 | 18 | FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures") 19 | 20 | 21 | def test_invert_job_statuses(mocker): 22 | mocker.patch( 23 | "a3m.server.jobs.Job.STATUSES", 24 | ( 25 | (1, gettext_lazy("Uno")), 26 | (2, gettext_lazy("Dos")), 27 | (3, gettext_lazy("Tres")), 28 | ), 29 | ) 30 | ret = workflow._invert_job_statuses() 31 | assert ret == {"Uno": 1, "Dos": 2, "Tres": 3} 32 | 33 | 34 | def test_load_invalid_document(): 35 | blob = StringIO("""{}""") 36 | with pytest.raises(workflow.SchemaValidationError): 37 | workflow.load(blob) 38 | 39 | 40 | def test_load_invalid_json(): 41 | blob = StringIO("""{_}""") 42 | with pytest.raises(ValueError): 43 | workflow.load(blob) 44 | 45 | 46 | @pytest.mark.parametrize( 47 | "path", 48 | ( 49 | os.path.join(ASSETS_DIR, "workflow.json"), 50 | os.path.join(FIXTURES_DIR, "workflow-integration-test.json"), 51 | ), 52 | ) 53 | def test_load_valid_document(path): 54 | with open(path) as fp: 55 | wf = workflow.load(fp) 56 | 57 | links = wf.get_links() 58 | assert len(links) > 0 59 | first_link = next(iter(links.values())) 60 | assert repr(first_link) == f"Link <{first_link.id}>" 61 | assert isinstance(first_link, workflow.Link) 62 | assert first_link.config == first_link._src["config"] 63 | 64 | # Workflow __str__ method 65 | assert str(wf) == f"Links {len(links)}" 66 | 67 | # Test normalization of job statuses. 68 | link = next(iter(links.values())) 69 | valid_statuses = list(workflow._STATUSES.values()) 70 | assert link["fallback_job_status"] in valid_statuses 71 | for item in link["exit_codes"].values(): 72 | assert item["job_status"] in valid_statuses 73 | 74 | # Test get_label method in LinkBase. 75 | assert ( 76 | first_link.get_label("description") 77 | == first_link._src["description"][translation.FALLBACK_LANG] 78 | ) 79 | assert first_link.get_label("foobar") is None 80 | 81 | 82 | def test_link_browse_methods(mocker): 83 | with open(os.path.join(ASSETS_DIR, "workflow.json")) as fp: 84 | wf = workflow.load(fp) 85 | ln = wf.get_link("0fd20984-db3c-492b-a512-eedd74bacc82") 86 | assert ln.get_next_link(code="0").id == "82ee9ad2-2c74-4c7c-853e-e4eaf68fc8b6" 87 | assert ln.get_status_id(code="0") == workflow._STATUSES["Completed successfully"] 88 | assert ln.get_next_link(code="1").id == "82ee9ad2-2c74-4c7c-853e-e4eaf68fc8b6" 89 | assert ln.get_status_id(code="1") == workflow._STATUSES["Failed"] 90 | 91 | 92 | def test_get_schema(): 93 | schema = workflow._get_schema() 94 | assert schema["$id"] == "https://a3m.readthedocs.io/workflow/schema/v1.json" 95 | 96 | 97 | def test_get_schema_not_found(mocker): 98 | mocker.patch("a3m.server.workflow._LATEST_SCHEMA", "non-existen-schema") 99 | with pytest.raises(IOError): 100 | workflow._get_schema() 101 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := help 2 | 3 | export COMPOSE_DOCKER_CLI_BUILD=1 4 | export DOCKER_BUILDKIT=1 5 | export BUILDKIT_PROGRESS=plain 6 | 7 | A3M_PIPELINE_DATA ?= $(CURDIR)/hack/compose-volume 8 | 9 | CURRENT_UID := $(shell id -u) 10 | CURRENT_GID := $(shell id -g) 11 | 12 | PYTHON_VERSION = $(shell cat .python-version | awk -F '.' '{print $$1 "." $$2}') 13 | 14 | define compose 15 | docker compose -f compose.yml $(1) 16 | endef 17 | 18 | define compose_run 19 | $(call compose, \ 20 | run \ 21 | --rm \ 22 | --user=$(CURRENT_UID):$(CURRENT_GID) \ 23 | --workdir /a3m \ 24 | --no-deps \ 25 | $(1)) 26 | endef 27 | 28 | .PHONY: shell 29 | shell: ## Open a shell in a new container. 30 | $(call compose_run, \ 31 | --entrypoint bash \ 32 | a3m) 33 | 34 | .PHONY: build 35 | build: ## Build containers. 36 | $(call compose, \ 37 | build \ 38 | --build-arg USER_ID=$(CURRENT_UID) \ 39 | --build-arg GROUP_ID=$(CURRENT_GID)) 40 | 41 | .PHONY: create-volume 42 | create-volume: ## Create external data volume. 43 | mkdir -p ${A3M_PIPELINE_DATA} 44 | docker volume create \ 45 | --opt type=none \ 46 | --opt o=bind \ 47 | --opt device=$(A3M_PIPELINE_DATA) \ 48 | a3m-pipeline-data 49 | 50 | .PHONY: manage 51 | manage: ## Run Django /manage.py on a3m, suppling [options] as value to ARG, e.g., `make manage ARG=shell` 52 | $(call compose_run, \ 53 | --entrypoint /a3m/manage.py \ 54 | a3m \ 55 | $(ARG)) 56 | 57 | .PHONY: bootstrap 58 | bootstrap: ## Bootstrap a3m (new database). 59 | $(MAKE) manage ARG="migrate --noinput" 60 | 61 | .PHONY: makemigrations 62 | makemigrations: ## Make Django migrations. 63 | $(MAKE) manage ARG="makemigrations main fpr" 64 | 65 | .PHONY: stop 66 | stop: ## Stop services 67 | docker-compose stop a3m 68 | 69 | .PHONY: restart 70 | restart: ## Restart services 71 | docker-compose restart a3m 72 | 73 | .PHONY: db 74 | db: 75 | $(call compose_run, \ 76 | --entrypoint=sqlite3 \ 77 | a3m \ 78 | ./hack/compose-volume/db.sqlite) 79 | 80 | .PHONY: flush 81 | flush: stop flush-db flush-shared-dir bootstrap restart ## Delete ALL user data. 82 | 83 | .PHONY: flush-db 84 | flush-db: ## Flush SQLite database. 85 | $(call compose_run, \ 86 | --entrypoint sh \ 87 | a3m \ 88 | -c "rm -rf /home/a3m/.local/share/a3m/db.sqlite") 89 | 90 | .PHONY: flush-shared-dir 91 | flush-shared-dir: ## Flush shared directory including the database. 92 | $(call compose_run, \ 93 | --entrypoint sh \ 94 | a3m \ 95 | -c "rm -rf /home/a3m/.local/share/a3m/share/") 96 | 97 | .PHONY: buf 98 | buf: 99 | docker run \ 100 | --volume "$(CURDIR)/proto:/workspace" \ 101 | --workdir /workspace \ 102 | bufbuild/buf:1.42.0 \ 103 | $(ARG) 104 | 105 | .PHONY: help 106 | help: ## Print this help message. 107 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 108 | 109 | 110 | RED := \033[0;31m 111 | GREEN := \033[0;32m 112 | YELLOW := \033[0;33m 113 | RESET := \033[0m 114 | define print_color 115 | @echo "$(1)$(2)$(RESET)" 116 | endef 117 | 118 | .PHONY: workflow 119 | workflow: ## Open amflow application web server. 120 | $(call print_color,$(YELLOW),Access the amflow server at http://127.0.0.1:2323 once it's fully started.) 121 | @docker run --rm --publish=2323:2323 --pull=always \ 122 | --volume=$(CURDIR)/a3m/assets/workflow.json:/tmp/workflow.json \ 123 | artefactual/amflow:latest \ 124 | edit --file=/tmp/workflow.json --verbosity=warn 125 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Dependency management 6 | --------------------- 7 | 8 | Python dependencies 9 | ^^^^^^^^^^^^^^^^^^^ 10 | 11 | The requirements are listed in ``/pyproject.toml``. We use ``uv`` to manage the 12 | project environment, including the ``uv.lock`` lockfile. 13 | 14 | Create and activate the virtual environment with:: 15 | 16 | $ uv sync --dev 17 | $ source .venb/bin/activate 18 | 19 | Update the lockfile allowing package upgrades:: 20 | 21 | $ uv lock --upgrade 22 | 23 | At this point you can also look up new versions beyond our constraints, e.g.:: 24 | 25 | $ uv run --with=pip pip list --outdated 26 | 27 | The `project lockfile`_ documentation page describes other operations such as 28 | upgrading locked package versions individually. 29 | 30 | pre-commit 31 | ^^^^^^^^^^ 32 | 33 | pre-commit is a framework we use for managing and maintaining pre-commit hooks. 34 | The easiest way to discover and apply new updates is to run:: 35 | 36 | $ pre-commit autoupdate 37 | 38 | Commit the changes and run pre-commit again with:: 39 | 40 | $ pre-commit run --all-files 41 | 42 | Python version 43 | ^^^^^^^^^^^^^^ 44 | 45 | There is a pinned version of Python in ``/.python-version`` that we use when 46 | packaging our Docker image and other development-oriented tools. The preference 47 | is to use the latest version available. Currently: 48 | 49 | .. include:: ../.python-version 50 | :code: 51 | 52 | Releases 53 | -------- 54 | 55 | We aim to further enhance and automate our release process. 56 | 57 | Please adhere to the following instructions: 58 | 59 | 1. Update the changelog (use ``scriv collect`` to populate ``CHANGELOG.rst``). 60 | Submit these changes through a pull request and merge it once all checks have 61 | passed. 62 | 2. Confirm that the checks are also passing in ``main``. 63 | 3. Create and push the git tag, e.g.:: 64 | 65 | $ git tag v0.7.7 66 | $ git push origin refs/tags/v0.7.7 67 | 68 | This should have triggered the publishing workflow. Please confirm that the 69 | new version of the package is available on `PyPI`_ and that the container 70 | image has been published to the `GitHub Container Registry`_. 71 | 72 | Import FPR dataset from Archivematica 73 | ------------------------------------- 74 | 75 | a3m loads the FPR dataset from a JSON document 76 | (``a3m/fpr/migrations/initial-data.json``) generated from the upstream 77 | Archivematica project. This section describes how to generate it: 78 | 79 | In Archivematica, generate a dump with:: 80 | 81 | manage.py dumpdata --format=json fpr 82 | 83 | Remove unused models from the document:: 84 | 85 | jq --sort-keys --indent 4 '[.[] | select(.model == "fpr.format" or .model == "fpr.formatgroup" or .model == "fpr.formatversion" or .model == "fpr.fpcommand" or .model == "fpr.fprule" or .model == "fpr.fptool")]' fpr-dumpdata.json > output.json 86 | 87 | Replace the dataset:: 88 | 89 | mv output.json ../../a3m/fpr/migrations/initial-data.json 90 | 91 | From the root directory, run the registry sanity checks:: 92 | 93 | pytest tests/test_registry.py 94 | 95 | Based on the validation issues reported, fix as needed. Make sure that the 96 | ``fiwalk`` command is not using a ficonfig file. 97 | 98 | 99 | .. _PyPI: https://pypi.org/project/a3m/ 100 | .. _GitHub Container Registry: https://ghcr.io/artefactual-labs/a3m 101 | .. _project lockfile: https://docs.astral.sh/uv/concepts/projects/#project-lockfile 102 | -------------------------------------------------------------------------------- /a3m/client/clientScripts/change_names.py: -------------------------------------------------------------------------------- 1 | # This file is part of Archivematica. 2 | # 3 | # Copyright 2010-2013 Artefactual Systems Inc. 4 | # 5 | # Archivematica is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU Affero General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # Archivematica is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with Archivematica. If not, see . 17 | import os 18 | import re 19 | import shutil 20 | 21 | from unidecode import unidecode 22 | 23 | from a3m.archivematicaFunctions import strToUnicode 24 | 25 | VERSION = "1.10." + "$Id: 8a86512da19d81a86e6219fc21467594e562813b $".split(" ")[1] 26 | 27 | # Letters, digits and a few punctuation characters 28 | ALLOWED_CHARS = re.compile(r"[^a-zA-Z0-9\-_.\(\)]") 29 | REPLACEMENT_CHAR = "_" 30 | 31 | 32 | def change_name(basename): 33 | if basename == "": 34 | raise ValueError("change_name recieved an empty filename.") 35 | unicode_basename = strToUnicode(basename) 36 | unicode_name = unidecode(unicode_basename) 37 | # We can't return an empty string here because it will become the new filename. 38 | # However, in some cases unidecode just strips out all chars (e.g. 39 | # unidecode(u"🚀") == ""), so if that happens, we to replace the invalid chars with 40 | # REPLACEMENT_CHAR. This will result in a filename of one or more underscores, 41 | # which isn't great, but allows processing to continue. 42 | if unicode_name == "": 43 | unicode_name = unicode_basename 44 | 45 | return ALLOWED_CHARS.sub(REPLACEMENT_CHAR, unicode_name) 46 | 47 | 48 | def change_path(path): 49 | basename = os.path.basename(path) 50 | changed_name = change_name(basename) 51 | 52 | if basename == changed_name: 53 | return path 54 | 55 | dirname = os.path.dirname(path) 56 | 57 | n = 1 58 | file_title, file_extension = os.path.splitext(changed_name) 59 | changed_name = os.path.join(dirname, file_title + file_extension) 60 | 61 | while os.path.exists(changed_name): 62 | changed_name = os.path.join( 63 | dirname, file_title + REPLACEMENT_CHAR + str(n) + file_extension 64 | ) 65 | n += 1 66 | shutil.move(path, changed_name) 67 | 68 | return changed_name 69 | 70 | 71 | def change_tree(start_path, old_start_path): 72 | """ 73 | Recursive generator to change all filesystem entries under the start 74 | path given. 75 | 76 | Yields a tuple of (old_path, changed_path, is_dir, was_changed) once 77 | for each file or dir within the start_path, everything contained in each 78 | dir. 79 | """ 80 | start_path = os.path.abspath(start_path) 81 | 82 | for dir_entry in os.scandir(start_path): 83 | is_dir = dir_entry.is_dir() # cache is_dir before rename 84 | 85 | changed_name = change_path(dir_entry.path) 86 | changed_path = os.path.join(start_path, changed_name) 87 | old_path = os.path.join(old_start_path, dir_entry.name) 88 | 89 | was_changed = changed_path != old_path 90 | yield old_path, changed_path, is_dir, was_changed 91 | 92 | if is_dir: 93 | yield from change_tree(changed_path, old_path) 94 | -------------------------------------------------------------------------------- /a3m/server/rpc/client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections.abc import Callable 3 | 4 | import tenacity 5 | from grpc import Channel 6 | from grpc import RpcError 7 | 8 | from a3m import __version__ 9 | from a3m.api.transferservice import v1beta1 as transfer_service_api 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | # Default duration in seconds of RPC calls. 15 | _GRPC_DEFAULT_TIMEOUT_SECS = 30 16 | 17 | # Metadata key containing the client version. 18 | _VERSION_METADATA_KEY = "version" 19 | 20 | 21 | class Client: 22 | """a3m gRPC API client.""" 23 | 24 | def __init__( 25 | self, 26 | channel: Channel, 27 | rpc_timeout: int | None = _GRPC_DEFAULT_TIMEOUT_SECS, 28 | wait_for_ready: bool = False, 29 | ): 30 | self.transfer_stub = transfer_service_api.service_pb2_grpc.TransferServiceStub( 31 | channel 32 | ) 33 | self.rpc_timeout = rpc_timeout 34 | self.wait_for_ready = wait_for_ready 35 | 36 | def _unary_call(self, api_method, request): 37 | rpc_name = request.__class__.__name__.replace("Request", "") 38 | logger.debug("RPC call %s with request: %r", rpc_name, request) 39 | try: 40 | return api_method( 41 | request, 42 | timeout=self.rpc_timeout, 43 | metadata=Client.version_metadata(), 44 | wait_for_ready=self.wait_for_ready, 45 | ) 46 | except RpcError as e: 47 | logger.warning("RPC call %s got error %s", rpc_name, e) 48 | raise 49 | 50 | @staticmethod 51 | def version_metadata(): 52 | return ((_VERSION_METADATA_KEY, __version__),) 53 | 54 | def submit( 55 | self, 56 | url: str, 57 | name: str, 58 | config: transfer_service_api.request_response_pb2.ProcessingConfig = None, 59 | ): 60 | request = transfer_service_api.request_response_pb2.SubmitRequest( 61 | name=name, url=url, config=config 62 | ) 63 | return self._unary_call(self.transfer_stub.Submit, request) 64 | 65 | def read(self, package_id: str): 66 | request = transfer_service_api.request_response_pb2.ReadRequest(id=package_id) 67 | return self._unary_call(self.transfer_stub.Read, request) 68 | 69 | def wait_until_complete( 70 | self, package_id: str, spin_cb: Callable = None 71 | ) -> transfer_service_api.request_response_pb2.ReadResponse: 72 | """Blocks until processing of a package has completed.""" 73 | 74 | def _should_continue( 75 | resp: transfer_service_api.request_response_pb2.ReadResponse, 76 | ): 77 | return ( 78 | resp.status 79 | == transfer_service_api.request_response_pb2.PACKAGE_STATUS_PROCESSING 80 | ) 81 | 82 | def _callback(retry_state): 83 | if spin_cb is not None: 84 | spin_cb(retry_state) 85 | 86 | @tenacity.retry( 87 | wait=tenacity.wait_fixed(1), 88 | retry=tenacity.retry_if_result(_should_continue), 89 | after=_callback, 90 | ) 91 | def _poll(): 92 | """Retries while the package is processing.""" 93 | return self.read(package_id) 94 | 95 | return _poll() 96 | 97 | def list_tasks(self, job_id: str): 98 | request = transfer_service_api.request_response_pb2.ListTasksRequest( 99 | job_id=job_id 100 | ) 101 | return self._unary_call(self.transfer_stub.ListTasks, request) 102 | --------------------------------------------------------------------------------