├── .git_archival.txt
├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── ci-docker.yml
    │   ├── copilot-setup-steps.yml
    │   ├── machines.yml
    │   ├── mirror_gitee.yml
    │   ├── publish_conda.yml
    │   ├── pyright.yml
    │   ├── release.yml
    │   ├── test-bohrium.yml
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── AGENTS.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── ci
    ├── LICENSE
    ├── README.md
    ├── pbs.sh
    ├── pbs
    │   ├── docker-compose.yml
    │   └── start-pbs.sh
    ├── slurm.sh
    ├── slurm
    │   ├── docker-compose.yml
    │   ├── register_cluster.sh
    │   └── start-slurm.sh
    ├── ssh.sh
    ├── ssh
    │   ├── docker-compose.yml
    │   └── start-ssh.sh
    └── ssh_rsync.sh
├── codecov.yml
├── conda
    ├── conda_build_config.yaml
    └── meta.yaml
├── doc
    ├── .gitignore
    ├── Makefile
    ├── batch.md
    ├── cli.rst
    ├── conf.py
    ├── context.md
    ├── credits.rst
    ├── dpdispatcher_on_yarn.md
    ├── env.md
    ├── examples
    │   ├── expanse.md
    │   ├── g16.md
    │   ├── shell.md
    │   └── template.md
    ├── getting-started.md
    ├── index.rst
    ├── install.md
    ├── machine.rst
    ├── make.bat
    ├── pep723.rst
    ├── requirements.txt
    ├── resources.rst
    ├── run.md
    └── task.rst
├── dpdispatcher
    ├── __init__.py
    ├── __main__.py
    ├── arginfo.py
    ├── base_context.py
    ├── contexts
    │   ├── __init__.py
    │   ├── dp_cloud_server_context.py
    │   ├── hdfs_context.py
    │   ├── lazy_local_context.py
    │   ├── local_context.py
    │   ├── openapi_context.py
    │   └── ssh_context.py
    ├── dlog.py
    ├── dpcloudserver
    │   ├── __init__.py
    │   └── client.py
    ├── dpdisp.py
    ├── entrypoints
    │   ├── __init__.py
    │   ├── gui.py
    │   ├── run.py
    │   └── submission.py
    ├── machine.py
    ├── machines
    │   ├── JH_UniScheduler.py
    │   ├── __init__.py
    │   ├── distributed_shell.py
    │   ├── dp_cloud_server.py
    │   ├── fugaku.py
    │   ├── lsf.py
    │   ├── openapi.py
    │   ├── pbs.py
    │   ├── shell.py
    │   └── slurm.py
    ├── run.py
    ├── submission.py
    └── utils
    │   ├── __init__.py
    │   ├── dpcloudserver
    │       ├── __init__.py
    │       ├── client.py
    │       ├── config.py
    │       ├── retcode.py
    │       └── zip_file.py
    │   ├── hdfs_cli.py
    │   ├── job_status.py
    │   ├── record.py
    │   └── utils.py
├── examples
    ├── dpdisp_run.py
    ├── machine
    │   ├── expanse.json
    │   ├── lazy_local.json
    │   ├── mandu.json
    │   └── ssh_proxy_command.json
    ├── resources
    │   ├── expanse_cpu.json
    │   ├── mandu.json
    │   ├── template.slurm
    │   └── tiger.json
    └── task
    │   ├── deepmd-kit.json
    │   └── g16.json
├── pyproject.toml
├── scripts
    ├── script_gen_dargs_docs.py
    └── script_gen_dargs_json.py
└── tests
    ├── .gitignore
    ├── __init__.py
    ├── batch.json
    ├── context.py
    ├── debug_test_class_submission_init.py
    ├── devel_test_JH_UniScheduler.py
    ├── devel_test_ali_ehpc.py
    ├── devel_test_dp_cloud_server.py
    ├── devel_test_lazy_ali_ehpc.py
    ├── devel_test_lsf.py
    ├── devel_test_shell.py
    ├── devel_test_slurm.py
    ├── devel_test_ssh_ali_ehpc.py
    ├── graph.pb
    ├── hello_world.py
    ├── jsons
        ├── job.json
        ├── machine.json
        ├── machine_JH_UniScheduler.json
        ├── machine_ali_ehpc.json
        ├── machine_center.json
        ├── machine_diffenert.json
        ├── machine_dp_cloud_server.json
        ├── machine_fugaku.json
        ├── machine_if_cuda_multi_devices.json
        ├── machine_lazy_local_jh_unischeduler.json
        ├── machine_lazy_local_lsf.json
        ├── machine_lazy_local_slurm.json
        ├── machine_lazylocal_shell.json
        ├── machine_local_fugaku.json
        ├── machine_local_shell.json
        ├── machine_lsf.json
        ├── machine_openapi.json
        ├── machine_slurm.json
        ├── machine_yarn.json
        ├── resources.json
        ├── submission.json
        └── task.json
    ├── sample_class.py
    ├── script_gen_json.py
    ├── slurm_test.env
    ├── test_JH_UniScheduler_script_generation.py
    ├── test_argcheck.py
    ├── test_class_job.py
    ├── test_class_machine.py
    ├── test_class_machine_dispatch.py
    ├── test_class_resources.py
    ├── test_class_submission.py
    ├── test_class_submission_init.py
    ├── test_class_task.py
    ├── test_cli.py
    ├── test_context_dir
        └── 0_md
        │   ├── bct-1
        │       ├── conf.lmp
        │       ├── input.lammps
        │       └── some_dir
        │       │   └── some_file
        │   ├── bct-2
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-3
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-4
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── dir with space
        │       └── file with space
        │   ├── graph.pb
        │   └── some_dir
        │       └── some_file
    ├── test_examples.py
    ├── test_group_size.py
    ├── test_gui.py
    ├── test_hdfs_context.py
    ├── test_hdfs_dir
        └── 0_md
        │   ├── bct-1
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-2
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-3
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-4
        │       ├── conf.lmp
        │       └── input.lammps
        │   └── graph.pb
    ├── test_if_cuda_multi_devices
        └── test_dir
        │   └── test.txt
    ├── test_import_classes.py
    ├── test_jh_unischeduler
        └── 0_md
        │   ├── bct-1
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-2
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-3
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-4
        │       ├── conf.lmp
        │       └── input.lammps
        │   └── graph.pb
    ├── test_lazy_local_context.py
    ├── test_local_context.py
    ├── test_lsf_dir
        └── 0_md
        │   ├── bct-1
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-2
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-3
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-4
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── graph.pb
        │   └── submission.json
    ├── test_lsf_script_generation.py
    ├── test_pbs_dir
        └── 0_md
        │   ├── bct-1
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-2
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-3
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-4
        │       ├── conf.lmp
        │       └── input.lammps
        │   └── graph.pb
    ├── test_retry.py
    ├── test_rsync_flags.py
    ├── test_rsync_proxy.py
    ├── test_run.py
    ├── test_run_submission.py
    ├── test_run_submission_bohrium.py
    ├── test_run_submission_ratio_unfinished.py
    ├── test_shell_cuda_multi_devices.py
    ├── test_shell_trival.py
    ├── test_shell_trival_dir
        ├── fail_dir
        │   └── mock_fail_task.txt
        ├── parent_dir
        │   ├── dir with space
        │   │   └── example.txt
        │   ├── dir1
        │   │   └── example.txt
        │   ├── dir2
        │   │   └── example.txt
        │   ├── dir3
        │   │   └── example.txt
        │   ├── dir4
        │   │   └── example.txt
        │   └── graph.pb
        └── recover_dir
        │   └── mock_recover_task.txt
    ├── test_slurm_dir
        └── 0_md
        │   ├── bct-1
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-2
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-3
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── bct-4
        │       ├── conf.lmp
        │       └── input.lammps
        │   ├── d3c842c5b9476e48f7145b370cd330372b9293e1.json
        │   ├── graph.pb
        │   └── submission.json
    ├── test_slurm_script_generation.py
    ├── test_ssh_context.py
    ├── test_ssh_jump_host.py
    └── test_work_path
        └── .gitkeep


/.git_archival.txt:
--------------------------------------------------------------------------------
1 | node: 4816095c9e711259877fb90023ce74ce527ba5c3
2 | node-date: 2025-10-13T18:04:52+08:00
3 | describe-name: v0.6.12-1-g4816095c
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | .git_archival.txt  export-subst
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "github-actions"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-docker.yml:
--------------------------------------------------------------------------------
 1 | name: Build docker image and push to Docker Hub
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'
 7 | 
 8 | jobs:
 9 |   build-n-push:
10 |     name: Build docker image and push to Docker Hub
11 |     if: github.repository == 'deepmodeling/dpdispatcher'
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - name: Check out the repo
15 |       uses: actions/checkout@v5
16 | 
17 |     - name: Log in to Docker Hub
18 |       uses: docker/login-action@v3
19 |       with:
20 |         username: ${{ secrets.DOCKER_USERNAME }}
21 |         password: ${{ secrets.DOCKER_PASSWORD }}
22 | 
23 |     - name: Set up QEMU
24 |       uses: docker/setup-qemu-action@v3
25 | 
26 |     - name: Set up Docker Buildx
27 |       id: buildx
28 |       uses: docker/setup-buildx-action@v3
29 | 
30 |     - name: Build and push
31 |       run: |
32 |         docker buildx build --platform linux/arm64,linux/amd64 -t dptechnology/dpdispatcher:${{ github.ref_name }} -t dptechnology/dpdispatcher:latest --push .
33 | 


--------------------------------------------------------------------------------
/.github/workflows/copilot-setup-steps.yml:
--------------------------------------------------------------------------------
 1 | name: "Copilot Setup Steps"
 2 | 
 3 | # Automatically run the setup steps when they are changed to allow for easy validation, and
 4 | # allow manual testing through the repository's "Actions" tab
 5 | on:
 6 |   workflow_dispatch:
 7 |   push:
 8 |     paths:
 9 |       - .github/workflows/copilot-setup-steps.yml
10 |   pull_request:
11 |     paths:
12 |       - .github/workflows/copilot-setup-steps.yml
13 | 
14 | jobs:
15 |   # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot.
16 |   copilot-setup-steps:
17 |     runs-on: ubuntu-latest
18 | 
19 |     # Set the permissions to the lowest permissions possible needed for your steps.
20 |     # Copilot will be given its own token for its operations.
21 |     permissions:
22 |       # If you want to clone the repository as part of your setup steps, for example to install dependencies, you'll need the `contents: read` permission. If you don't clone the repository in your setup steps, Copilot will do this for you automatically after the steps complete.
23 |       contents: read
24 | 
25 |     # You can define any steps you want, and they will run before the agent starts.
26 |     # If you do not check out your code, Copilot will do this for you.
27 |     steps:
28 |       - name: Checkout code
29 |         uses: actions/checkout@v5
30 | 
31 |       - name: Set up Python
32 |         uses: actions/setup-python@v6
33 |         with:
34 |           python-version: "3.11"
35 | 
36 |       - name: Install uv
37 |         run: |
38 |           # Install uv using pip to avoid network restrictions
39 |           python -m pip install --upgrade pip
40 |           python -m pip install uv
41 | 
42 |       - name: Create virtual environment and install dependencies
43 |         run: |
44 |           uv venv .venv
45 |           source .venv/bin/activate
46 |           uv pip install .[test] coverage
47 | 
48 |       - name: Install development tools
49 |         run: |
50 |           uv tool install pre-commit
51 |           uv tool install pyright
52 | 
53 |       - name: Set up pre-commit hooks
54 |         run: |
55 |           source .venv/bin/activate
56 |           pre-commit install --install-hooks
57 | 
58 |       - name: Verify installation
59 |         run: |
60 |           source .venv/bin/activate
61 |           python --version
62 |           uv --version
63 |           pre-commit --version
64 |           pyright --version
65 |           python -c "import dpdispatcher; print('DPDispatcher installed successfully')"
66 | 


--------------------------------------------------------------------------------
/.github/workflows/machines.yml:
--------------------------------------------------------------------------------
 1 | name: Test on different machines
 2 | 
 3 | 'on':
 4 |   push:
 5 |     branches-ignore:
 6 |       - 'copilot/**'
 7 |       - 'dependabot/**'
 8 |       - 'pre-commit-ci-update-config'
 9 |   pull_request:
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         machine:
17 |           - slurm
18 |           - pbs
19 |           - ssh
20 |           - ssh_rsync
21 |     steps:
22 |     - uses: actions/checkout@v5
23 |     - run: ./ci/${{ matrix.machine }}.sh
24 |       env:
25 |         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
26 | 


--------------------------------------------------------------------------------
/.github/workflows/mirror_gitee.yml:
--------------------------------------------------------------------------------
 1 | name: Mirror to Gitee Repo
 2 | 
 3 | on: [ push, delete, create ]
 4 | 
 5 | # Ensures that only one mirror task will run at a time.
 6 | concurrency:
 7 |   group: git-mirror
 8 | 
 9 | jobs:
10 |   git-mirror:
11 |     uses: deepmodeling/workflows/.github/workflows/mirror_gitee.yml@main
12 |     secrets:
13 |       SYNC_GITEE_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }}
14 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_conda.yml:
--------------------------------------------------------------------------------
 1 | name: publish_conda
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@v5
13 |     - name: publish-to-conda
14 |       uses: felix5572/conda-publish-action@v1.9
15 |       with:
16 |         subdir: 'conda'
17 |         anacondatoken: ${{ secrets.ANACONDA_TOKEN }}
18 |         platforms: 'noarch'
19 | 


--------------------------------------------------------------------------------
/.github/workflows/pyright.yml:
--------------------------------------------------------------------------------
 1 | 'on':
 2 |   push:
 3 |     branches-ignore:
 4 |       - 'copilot/**'
 5 |       - 'dependabot/**'
 6 |       - 'pre-commit-ci-update-config'
 7 |   pull_request:
 8 | name: Type checker
 9 | jobs:
10 |   test:
11 |     name: pyright
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v5
15 |     - uses: actions/setup-python@v6
16 |       with:
17 |         python-version: '3.11'
18 |     - run: pip install uv
19 |     - run: uv pip install --system -e .[cloudserver,gui]
20 |     - uses: jakebailey/pyright-action@v2
21 |       with:
22 |         version: 1.1.404
23 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | 'on':
 2 |   push:
 3 |     branches-ignore:
 4 |       - 'copilot/**'
 5 |       - 'dependabot/**'
 6 |       - 'pre-commit-ci-update-config'
 7 |     tags:
 8 |       - 'v*'
 9 |   pull_request:
10 | name: Release to pypi
11 | jobs:
12 |   release-to-pypi:
13 |     name: Release to pypi
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |         # IMPORTANT: this permission is mandatory for trusted publishing
17 |         id-token: write
18 |     steps:
19 |     - uses: actions/checkout@v5
20 |     - name: Setup python
21 |       uses: actions/setup-python@v6
22 |       with:
23 |         python-version: 3.x
24 |         architecture: x64
25 |     - name: Install dependencies
26 |       run: python -m pip install build
27 |     - run: python -m build
28 |     - name: Publish a Python distribution to PyPI
29 |       if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
30 |       uses: pypa/gh-action-pypi-publish@release/v1
31 |       with:
32 |         verbose: true
33 | 


--------------------------------------------------------------------------------
/.github/workflows/test-bohrium.yml:
--------------------------------------------------------------------------------
 1 | name: Test Bohrium
 2 | 
 3 | 'on':
 4 |   push:
 5 |     branches-ignore:
 6 |       - 'copilot/**'
 7 |       - 'dependabot/**'
 8 |       - 'pre-commit-ci-update-config'
 9 |   pull_request_target:
10 |     types:
11 |       - "labeled"
12 | 
13 | jobs:
14 |   test:
15 |     runs-on: ubuntu-latest
16 |     environment: bohrium
17 |     if: github.repository_owner == 'deepmodeling' && (github.event.label.name == 'Test Bohrium' || github.event_name == 'push')
18 |     steps:
19 |     - uses: actions/checkout@v5
20 |       with:
21 |         ref: "${{ github.event.pull_request.merge_commit_sha }}"
22 |     - name: Set up Python 3.12
23 |       uses: actions/setup-python@v6
24 |       with:
25 |         python-version: '3.12'
26 |         cache: 'pip'
27 |     - run: pip install uv
28 |     - run: uv pip install --system .[bohrium] coverage
29 |     - name: Test
30 |       run: coverage run --source=./dpdispatcher -m unittest -v tests/test_run_submission_bohrium.py && coverage report
31 |       env:
32 |         DPDISPATCHER_TEST: bohrium
33 |         BOHRIUM_EMAIL: ${{ secrets.BOHRIUM_EMAIL }}
34 |         BOHRIUM_PASSWORD: ${{ secrets.BOHRIUM_PASSWORD }}
35 |         BOHRIUM_PROJECT_ID: ${{ secrets.BOHRIUM_PROJECT_ID }}
36 |         BOHRIUM_ACCESS_KEY: ${{ secrets.BOHRIUM_ACCESS_KEY }}
37 |     - uses: codecov/codecov-action@v5
38 |       env:
39 |         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
40 |   remove_label:
41 |     permissions:
42 |       contents: read
43 |       pull-requests: write
44 |     # so one can re-trigger the workflow without manually removing the label
45 |     runs-on: ubuntu-latest
46 |     if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test Bohrium'
47 |     steps:
48 |     - uses: actions-ecosystem/action-remove-labels@v1
49 |       with:
50 |         labels: Test Bohrium
51 |         number: ${{ github.event.pull_request.number }}
52 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | 'on':
 4 |   push:
 5 |     branches-ignore:
 6 |       - 'copilot/**'
 7 |       - 'dependabot/**'
 8 |       - 'pre-commit-ci-update-config'
 9 |   pull_request:
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ${{ matrix.platform }}
14 |     strategy:
15 |       matrix:
16 |         python-version:
17 |           - 3.7
18 |           - 3.9
19 |           - '3.10'
20 |           - '3.11'
21 |           - '3.12'
22 |         platform:
23 |           - ubuntu-22.04
24 |           - macos-latest
25 |           - windows-latest
26 |         exclude:  # Apple Silicon ARM64 does not support Python < v3.8
27 |           - python-version: "3.7"
28 |             platform: macos-latest
29 |         include:  # So run those legacy versions on Intel CPUs
30 |           - python-version: "3.7"
31 |             platform: macos-15-intel
32 |     steps:
33 |     - uses: actions/checkout@v5
34 |     - name: Set up Python ${{ matrix.python-version }}
35 |       uses: actions/setup-python@v6
36 |       with:
37 |         python-version: ${{ matrix.python-version }}
38 |     - uses: astral-sh/setup-uv@v7
39 |       with:
40 |         enable-cache: true
41 |         cache-dependency-glob: |
42 |           **/requirements*.txt
43 |           **/pyproject.toml
44 |     - run: uv pip install --system .[test] coverage
45 |     - name: Test
46 |       run: |
47 |         python -m coverage run -p --source=./dpdispatcher -m unittest -v
48 |         python -m coverage run -p --source=./dpdispatcher -m dpdispatcher -h
49 |         python -m coverage combine
50 |         python -m coverage report
51 |     - uses: codecov/codecov-action@v5
52 |       env:
53 |         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
54 |   pass:
55 |     needs: [test]
56 |     runs-on: ubuntu-latest
57 |     if: always()
58 |     steps:
59 |     - name: Decide whether the needed jobs succeeded or failed
60 |       uses: re-actors/alls-green@release/v1
61 |       with:
62 |         jobs: ${{ toJSON(needs) }}
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | POTCAR
 2 | # *.pb
 3 | tests/graph_real.pb
 4 | *~
 5 | *.d
 6 | *.o
 7 | *.aux
 8 | *.dvi
 9 | *.pdf
10 | *.so
11 | *.bin
12 | *.intbin
13 | *.meta
14 | *.log
15 | *.bz2
16 | *.pyc
17 | \#*
18 | iter.*
19 | 
20 | # out.txt
21 | topol.tpr
22 | mdout.mdp
23 | traj*xtc
24 | traj.trr
25 | ener.edr
26 | state*cpt
27 | CMakeCache.txt
28 | CMakeFiles
29 | log.lammps
30 | restart.*
31 | dump.*
32 | *.out
33 | build
34 | dist
35 | pydispatcher.egg-info
36 | */*.pyc
37 | */__pycache__
38 | *.swp
39 | .eggs
40 | .coverage*
41 | dbconfig.json
42 | .vscode/*
43 | .idea
44 | */_version.py
45 | */_date.py
46 | *.egg
47 | *.egg-info
48 | venv/*
49 | node_modules/
50 | # Test artifacts
51 | *_flag_if_job_task_fail
52 | *_job_id
53 | *_job_tag_finished
54 | *_task_tag_finished
55 | *.sub
56 | *.sub.run
57 | script_*.py
58 | # Job execution temporary files
59 | err
60 | log
61 | # JSON files with hash names (job state files)
62 | [0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]*.json
63 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v6.0.0
 6 |     hooks:
 7 |       - id: trailing-whitespace
 8 |         exclude: "^tests/"
 9 |       - id: end-of-file-fixer
10 |         exclude: "^tests/"
11 |       - id: check-yaml
12 |         exclude: "^conda/"
13 |       - id: check-json
14 |       - id: check-added-large-files
15 |       - id: check-merge-conflict
16 |       - id: check-symlinks
17 |       - id: check-toml
18 |   # Python
19 |   - repo: https://github.com/astral-sh/ruff-pre-commit
20 |     # Ruff version.
21 |     rev: v0.12.8
22 |     hooks:
23 |       - id: ruff
24 |         args: ["--fix"]
25 |       - id: ruff-format
26 |   # numpydoc
27 |   - repo: https://github.com/Carreau/velin
28 |     rev: 0.0.12
29 |     hooks:
30 |       - id: velin
31 |         args: ["--write"]
32 |   # Python inside docs
33 |   - repo: https://github.com/asottile/blacken-docs
34 |     rev: 1.19.1
35 |     hooks:
36 |       - id: blacken-docs
37 |   # markdown, yaml
38 |   - repo: https://github.com/pre-commit/mirrors-prettier
39 |     rev: v4.0.0-alpha.8
40 |     hooks:
41 |       - id: prettier
42 |         types_or: [markdown, yaml]
43 |         # workflow files cannot be modified by pre-commit.ci
44 |         exclude: ^(\.github/workflows|conda)
45 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.10"
13 |   jobs:
14 |     post_create_environment:
15 |       - pip install uv
16 |     post_install:
17 |       - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH uv pip install .[docs]
18 | # Build documentation in the docs/ directory with Sphinx
19 | sphinx:
20 |   configuration: doc/conf.py
21 | 
22 | # If using Sphinx, optionally build your docs in additional formats such as PDF
23 | formats: all
24 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## How to contribute
 2 | 
 3 | DPDispatcher welcomes every people (or organization) to use under the LGPL-3.0 License.
 4 | 
 5 | And Contributions are welcome and are greatly appreciated! Every little bit helps, and credit will always be given.
 6 | 
 7 | If you want to contribute to dpdispatcher, just open a issue, submiit a pull request , leave a comment on github discussion, or contact deepmodeling team.
 8 | 
 9 | Any forms of improvement are welcome.
10 | 
11 | - use, star or fork dpdispatcher
12 | - improve the documents
13 | - report or fix bugs
14 | - request, discuss or implement features
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12 AS compile-image
 2 | 
 3 | RUN python -m venv /opt/venv
 4 | # Make sure we use the virtualenv
 5 | ENV PATH="/opt/venv/bin:$PATH"
 6 | 
 7 | WORKDIR /data/dpdispatcher
 8 | COPY ./ ./
 9 | RUN pip install .[bohrium]
10 | 
11 | FROM python:3.12 AS build-image
12 | COPY --from=compile-image /opt/venv /opt/venv
13 | ENV PATH="/opt/venv/bin:$PATH"
14 | CMD ["/bin/bash"]
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DPDispatcher
 2 | 
 3 | [![conda-forge](https://img.shields.io/conda/dn/conda-forge/dpdispatcher?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/dpdispatcher)
 4 | [![pip install](https://img.shields.io/pypi/dm/dpdispatcher?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdispatcher)
 5 | [![docker pull](https://img.shields.io/docker/pulls/dptechnology/dpdispatcher?logo=docker)](https://hub.docker.com/r/dptechnology/dpdispatcher)
 6 | [![Documentation Status](https://readthedocs.org/projects/dpdispatcher/badge/)](https://dpdispatcher.readthedocs.io/)
 7 | 
 8 | DPDispatcher is a Python package used to generate HPC (High-Performance Computing) scheduler systems (Slurm/PBS/LSF/Bohrium) jobs input scripts, submit them to HPC systems, and poke until they finish.
 9 | 
10 | DPDispatcher will monitor (poke) until these jobs finish and download the results files (if these jobs are running on remote systems connected by SSH).
11 | 
12 | For more information, check the [documentation](https://dpdispatcher.readthedocs.io/).
13 | 
14 | ## Installation
15 | 
16 | DPDispatcher can be installed by `pip`:
17 | 
18 | ```bash
19 | pip install dpdispatcher
20 | ```
21 | 
22 | To add [Bohrium](https://bohrium.dp.tech/) support, execute
23 | 
24 | ```bash
25 | pip install dpdispatcher[bohrium]
26 | ```
27 | 
28 | ## Usage
29 | 
30 | See [Getting Started](https://dpdispatcher.readthedocs.io/en/latest/getting-started.html) for usage.
31 | 
32 | ## Contributing
33 | 
34 | DPDispatcher is maintained by Deep Modeling's developers and welcomes other people.
35 | See [Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓
36 | 
37 | ## References
38 | 
39 | DPDispatcher is derived from the [DP-GEN](https://github.com/deepmodeling/dpgen) package. To mention DPDispatcher in a scholarly publication, please read Section 3.3 in the [DP-GEN paper](https://doi.org/10.1016/j.cpc.2020.107206).
40 | 


--------------------------------------------------------------------------------
/ci/LICENSE:
--------------------------------------------------------------------------------
 1 | The files about slurm and pbs in this directory is originally under the following licenses:
 2 | 
 3 | Copyright (c) 2018, Anaconda, Inc. and contributors
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without modification,
 7 | are permitted provided that the following conditions are met:
 8 | 
 9 | Redistributions of source code must retain the above copyright notice,
10 | this list of conditions and the following disclaimer.
11 | 
12 | Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 | 
16 | Neither the name of Anaconda nor the names of any contributors may be used to
17 | endorse or promote products derived from this software without specific prior
18 | written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 | THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/ci/README.md:
--------------------------------------------------------------------------------
1 | ## Notes
2 | 
3 | Files about `slurm` and `pbs` in this directory is originally taken from [dask/dask-jobqueue](https://github.com/dask/dask-jobqueue) under [BSD 3-Clause "New" or "Revised" License](LICENSE).
4 | They have been relicensed under [LPGL 3.0](../LICENSE) as [they are compatible](https://www.gnu.org/licenses/license-list.html#ModifiedBSD).
5 | 


--------------------------------------------------------------------------------
/ci/pbs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | cd ./ci/pbs
 5 | docker compose pull
 6 | ./start-pbs.sh
 7 | cd -
 8 | 
 9 | docker exec pbs_master /bin/bash -c "chmod -R 777 /shared_space"
10 | docker exec pbs_master /bin/bash -c "chown -R pbsuser:pbsuser /home/pbsuser"
11 | 
12 | docker exec pbs_master /bin/bash -c "yum install -y procps"
13 | docker exec pbs_master /bin/bash -c "cd /dpdispatcher && pip install uv && uv pip install --system .[test] coverage && chown -R pbsuser ."
14 | docker exec -u pbsuser pbs_master /bin/bash -c "cd /dpdispatcher && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
15 | docker exec -u pbsuser --env-file <(env | grep GITHUB) pbs_master /bin/bash -c "cd /dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
16 | 


--------------------------------------------------------------------------------
/ci/pbs/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | 
 3 | services:
 4 |   master:
 5 |     image: daskdev/dask-jobqueue:pbs
 6 |     build: .
 7 |     container_name: pbs_master
 8 |     hostname: pbs_master
 9 |     environment:
10 |       - CI_SHARED_SPACE=/shared_space
11 |       - DPDISPATCHER_TEST=pbs
12 |     volumes:
13 |       - ../..:/dpdispatcher
14 |       - userhome:/home/pbsuser
15 |       - shared_space:/shared_space
16 |     command: bash /run-master.sh
17 | 
18 |   slave_one:
19 |     image: daskdev/dask-jobqueue:pbs
20 |     build: .
21 |     container_name: pbs_slave_1
22 |     hostname: pbs_slave_1
23 |     volumes:
24 |       - userhome:/home/pbsuser
25 |       - shared_space:/shared_space
26 |     entrypoint: "bash /slave-entrypoint.sh"
27 |     command: bash /run-slave.sh
28 |     links:
29 |       - "master:pbs_master"
30 |     environment:
31 |       - PBS_MASTER=pbs_master
32 |     depends_on:
33 |       - master
34 | 
35 |   slave_two:
36 |     image: daskdev/dask-jobqueue:pbs
37 |     build: .
38 |     container_name: pbs_slave_2
39 |     hostname: pbs_slave_2
40 |     volumes:
41 |       - userhome:/home/pbsuser
42 |       - shared_space:/shared_space
43 |     entrypoint: "bash /slave-entrypoint.sh"
44 |     command: bash /run-slave.sh
45 |     links:
46 |       - "master:pbs_master"
47 |     environment:
48 |       - PBS_MASTER=pbs_master
49 |     depends_on:
50 |       - master
51 | 
52 | volumes:
53 |   userhome:
54 |   shared_space:
55 | 


--------------------------------------------------------------------------------
/ci/pbs/start-pbs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | docker compose up -d --no-build
 4 | while [ `docker exec -u pbsuser pbs_master pbsnodes -a | grep "Mom = pbs_slave" | wc -l` -ne 2 ]
 5 | do
 6 |     echo "Waiting for PBS slave nodes to become available";
 7 |     sleep 2
 8 | done
 9 | echo "PBS properly configured"
10 | 


--------------------------------------------------------------------------------
/ci/slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | cd ./ci/slurm
 5 | docker compose pull
 6 | ./start-slurm.sh
 7 | cd -
 8 | 
 9 | docker exec slurmctld /bin/bash -c "yum install -y procps"
10 | docker exec slurmctld /bin/bash -c "cd dpdispatcher && pip install uv && uv pip install --system .[test] coverage && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
11 | docker exec --env-file <(env | grep -e GITHUB -e CODECOV) slurmctld /bin/bash -c "cd dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
12 | 


--------------------------------------------------------------------------------
/ci/slurm/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | version: "2.2"
  2 | 
  3 | services:
  4 |   mysql:
  5 |     image: mysql:5.7.29
  6 |     hostname: mysql
  7 |     container_name: mysql
  8 |     environment:
  9 |       MYSQL_RANDOM_ROOT_PASSWORD: "yes"
 10 |       MYSQL_DATABASE: slurm_acct_db
 11 |       MYSQL_USER: slurm
 12 |       MYSQL_PASSWORD: password
 13 |     volumes:
 14 |       - var_lib_mysql:/var/lib/mysql
 15 |     networks:
 16 |       common-network:
 17 | 
 18 |   slurmdbd:
 19 |     image: daskdev/dask-jobqueue:slurm
 20 |     build: .
 21 |     command: ["slurmdbd"]
 22 |     container_name: slurmdbd
 23 |     hostname: slurmdbd
 24 |     volumes:
 25 |       - etc_munge:/etc/munge
 26 |       - etc_slurm:/etc/slurm
 27 |       - var_log_slurm:/var/log/slurm
 28 |     expose:
 29 |       - "6819"
 30 |     depends_on:
 31 |       - mysql
 32 |     networks:
 33 |       common-network:
 34 | 
 35 |   slurmctld:
 36 |     image: daskdev/dask-jobqueue:slurm
 37 |     build: .
 38 |     command: ["slurmctld"]
 39 |     container_name: slurmctld
 40 |     hostname: slurmctld
 41 |     environment:
 42 |       CI_SHARED_SPACE: /data
 43 |       DPDISPATCHER_TEST: slurm
 44 |     volumes:
 45 |       - etc_munge:/etc/munge
 46 |       - etc_slurm:/etc/slurm
 47 |       - slurm_jobdir:/data
 48 |       - var_log_slurm:/var/log/slurm
 49 |       - ../..:/dpdispatcher
 50 |     expose:
 51 |       - "6817"
 52 |     depends_on:
 53 |       - "slurmdbd"
 54 |     networks:
 55 |       common-network:
 56 |         ipv4_address: 10.1.1.10
 57 |     cap_add:
 58 |       - NET_ADMIN
 59 | 
 60 |   c1:
 61 |     image: daskdev/dask-jobqueue:slurm
 62 |     build: .
 63 |     command: ["slurmd"]
 64 |     hostname: c1
 65 |     container_name: c1
 66 |     volumes:
 67 |       - etc_munge:/etc/munge
 68 |       - etc_slurm:/etc/slurm
 69 |       - slurm_jobdir:/data
 70 |       - var_log_slurm:/var/log/slurm
 71 |     expose:
 72 |       - "6818"
 73 |     depends_on:
 74 |       - "slurmctld"
 75 |     networks:
 76 |       common-network:
 77 |         ipv4_address: 10.1.1.11
 78 |     cap_add:
 79 |       - NET_ADMIN
 80 | 
 81 |   c2:
 82 |     image: daskdev/dask-jobqueue:slurm
 83 |     build: .
 84 |     command: ["slurmd"]
 85 |     hostname: c2
 86 |     container_name: c2
 87 |     volumes:
 88 |       - etc_munge:/etc/munge
 89 |       - etc_slurm:/etc/slurm
 90 |       - slurm_jobdir:/data
 91 |       - var_log_slurm:/var/log/slurm
 92 |     expose:
 93 |       - "6818"
 94 |     depends_on:
 95 |       - "slurmctld"
 96 |     networks:
 97 |       common-network:
 98 |         ipv4_address: 10.1.1.12
 99 |     cap_add:
100 |       - NET_ADMIN
101 | 
102 | volumes:
103 |   etc_munge:
104 |   etc_slurm:
105 |   slurm_jobdir:
106 |   var_lib_mysql:
107 |   var_log_slurm:
108 | 
109 | networks:
110 |   common-network:
111 |     driver: bridge
112 |     ipam:
113 |       driver: default
114 |       config:
115 |         - subnet: 10.1.1.0/24
116 | 


--------------------------------------------------------------------------------
/ci/slurm/register_cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | docker exec slurmctld bash -c "/usr/bin/sacctmgr --immediate add cluster name=linux" && \
5 | docker compose restart slurmdbd slurmctld
6 | 


--------------------------------------------------------------------------------
/ci/slurm/start-slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | docker compose up -d --no-build
 4 | 
 5 | while [ `./register_cluster.sh 2>&1 | grep "sacctmgr: error" | wc -l` -ne 0 ]
 6 |   do
 7 |     echo "Waiting for SLURM cluster to become ready";
 8 |     sleep 2
 9 |   done
10 | echo "SLURM properly configured"
11 | 
12 | # On some clusters the login node does not have the same interface as the
13 | # compute nodes. The next three lines allow to test this edge case by adding
14 | # separate interfaces on the worker and on the scheduler nodes.
15 | docker exec slurmctld ip addr add 10.1.1.20/24 dev eth0 label eth0:scheduler
16 | docker exec c1 ip addr add 10.1.1.21/24 dev eth0 label eth0:worker
17 | docker exec c2 ip addr add 10.1.1.22/24 dev eth0 label eth0:worker
18 | 


--------------------------------------------------------------------------------
/ci/ssh.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | cd ./ci/ssh
 5 | docker compose pull
 6 | ./start-ssh.sh
 7 | cd -
 8 | 
 9 | docker exec test /bin/bash -c "cd /dpdispatcher && pip install uv && uv pip install --system .[test] coverage && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
10 | docker exec --env-file <(env | grep -e GITHUB -e CODECOV) test /bin/bash -c "cd /dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
11 | 


--------------------------------------------------------------------------------
/ci/ssh/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "2.2"
 2 | 
 3 | services:
 4 |   server:
 5 |     image: takeyamajp/ubuntu-sshd:ubuntu22.04
 6 |     build: .
 7 |     container_name: server
 8 |     hostname: server
 9 |     environment:
10 |       ROOT_PASSWORD: dpdispatcher
11 |     expose:
12 |       - "22"
13 |     volumes:
14 |       - ssh_config:/root/.ssh
15 |   jumphost:
16 |     image: takeyamajp/ubuntu-sshd:ubuntu22.04
17 |     build: .
18 |     container_name: jumphost
19 |     hostname: jumphost
20 |     environment:
21 |       ROOT_PASSWORD: dpdispatcher
22 |     expose:
23 |       - "22"
24 |     volumes:
25 |       - ssh_config:/root/.ssh
26 |   test:
27 |     image: python:3.10
28 |     tty: true
29 |     build: .
30 |     container_name: test
31 |     hostname: test
32 |     environment:
33 |       DPDISPATCHER_TEST: ssh
34 |     volumes:
35 |       - ssh_config:/root/.ssh
36 |       - ../..:/dpdispatcher
37 |     depends_on:
38 |       - server
39 |       - jumphost
40 | 
41 | volumes:
42 |   ssh_config:
43 | 


--------------------------------------------------------------------------------
/ci/ssh/start-ssh.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | docker compose up -d --no-build
 4 | 
 5 | # Set up SSH keys on server
 6 | docker exec server /bin/bash -c "ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N \"\" && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"
 7 | docker exec server /bin/bash -c "mkdir -p /dpdispatcher_working"
 8 | docker exec server /bin/bash -c "mkdir -p /tmp/rsync_test"
 9 | 
10 | # Set up SSH keys on jumphost and configure it to access server
11 | docker exec jumphost /bin/bash -c "ssh-keygen -b 2048 -t rsa -f /root/.ssh/id_rsa -q -N \"\" && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"
12 | 
13 | # Copy keys between containers to enable jump host functionality
14 | # Get the public key from jumphost and add it to server's authorized_keys
15 | docker exec jumphost /bin/bash -c "cat /root/.ssh/id_rsa.pub" | docker exec -i server /bin/bash -c "cat >> /root/.ssh/authorized_keys"
16 | 
17 | # Get the public key from test (which shares volume with server) and add it to jumphost authorized_keys
18 | docker exec test /bin/bash -c "cat /root/.ssh/id_rsa.pub" | docker exec -i jumphost /bin/bash -c "cat >> /root/.ssh/authorized_keys"
19 | 
20 | # Configure SSH client settings for known hosts to avoid host key verification
21 | docker exec test /bin/bash -c "echo 'StrictHostKeyChecking no' >> /root/.ssh/config && echo 'UserKnownHostsFile /dev/null' >> /root/.ssh/config"
22 | docker exec jumphost /bin/bash -c "echo 'StrictHostKeyChecking no' >> /root/.ssh/config && echo 'UserKnownHostsFile /dev/null' >> /root/.ssh/config"
23 | docker exec server /bin/bash -c "echo 'StrictHostKeyChecking no' >> /root/.ssh/config && echo 'UserKnownHostsFile /dev/null' >> /root/.ssh/config"
24 | 
25 | # Install rsync on all containers
26 | docker exec test /bin/bash -c "apt-get -y update && apt-get -y install rsync"
27 | docker exec jumphost /bin/bash -c "apt-get -y update && apt-get -y install rsync"
28 | docker exec server /bin/bash -c "apt-get -y update && apt-get -y install rsync"
29 | 


--------------------------------------------------------------------------------
/ci/ssh_rsync.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | cd ./ci/ssh
 5 | docker compose pull
 6 | ./start-ssh.sh
 7 | cd -
 8 | 
 9 | # install rsync
10 | docker exec server /bin/bash -c "apt-get -y update && apt-get -y install rsync"
11 | docker exec test /bin/bash -c "apt-get -y update && apt-get -y install rsync"
12 | 
13 | docker exec test /bin/bash -c "cd /dpdispatcher && pip install uv && uv pip install --system .[test] coverage && coverage run --source=./dpdispatcher -m unittest -v && coverage report"
14 | docker exec --env-file <(env | grep -e GITHUB -e CODECOV) test /bin/bash -c "cd /dpdispatcher && curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && ./codecov"
15 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | ignore:
 2 |   - "tests"
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         threshold: 100%
 8 |     patch:
 9 |       default:
10 |         threshold: 100%
11 | 


--------------------------------------------------------------------------------
/conda/conda_build_config.yaml:
--------------------------------------------------------------------------------
1 | channel_sources:
2 |   - defaults
3 |   - conda-forge
4 | channel_targets:
5 |   - deepmodeling
6 | 


--------------------------------------------------------------------------------
/conda/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "dpdispatcher" %}
 2 | {% set version = environ.get('GIT_DESCRIBE_TAG').lstrip('v') %}
 3 | 
 4 | package:
 5 |   name: {{ name|lower }}
 6 |   version: {{ version }}
 7 | 
 8 | source:
 9 |   git_url: https://github.com/deepmodeling/dpdispatcher
10 |   # git_rev: {{ version }}
11 | 
12 | build:
13 |   number: 0
14 |   noarch: python
15 |   script: python -m pip install --no-deps --ignore-installed .
16 | 
17 | requirements:
18 |   build:
19 |     - git
20 |   host:
21 |     - git
22 |     - python >=3.6
23 |     - pip
24 |     - setuptools_scm
25 |     - dargs
26 |     - paramiko
27 |     - requests
28 |     - tqdm
29 | 
30 |   run:
31 |     - python >=3.6
32 |     - dargs
33 |     - paramiko
34 |     - requests
35 |     - tqdm
36 | 
37 | test:
38 |   imports:
39 |     - dpdispatcher
40 | 
41 | about:
42 |   home: https://github.com/deepmodeling/dpdispatcher
43 |   license: LGPL-3.0
44 |   license_family: LGPL
45 |   license_file: LICENSE
46 |   doc_url: https://github.com/deepmodeling/dpdispatcher
47 |   dev_url: https://github.com/deepmodeling/dpdispatcher
48 | 
49 | extra:
50 |   recipe-maintainers:
51 |     - felix5572
52 | 


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
 1 | # sphinx build folder
 2 | _build
 3 | 
 4 | # Compiled source #
 5 | ###################
 6 | *.com
 7 | *.class
 8 | *.dll
 9 | *.exe
10 | *.o
11 | *.so
12 | 
13 | # Packages #
14 | ############
15 | # it's better to unpack these files and commit the raw source
16 | # git has its own built in compression methods
17 | *.7z
18 | *.dmg
19 | *.gz
20 | *.iso
21 | *.jar
22 | *.rar
23 | *.tar
24 | *.zip
25 | 
26 | # Logs and databases #
27 | ######################
28 | *.log
29 | *.sql
30 | *.sqlite
31 | 
32 | # OS generated files #
33 | ######################
34 | .DS_Store?
35 | ehthumbs.db
36 | Icon?
37 | Thumbs.db
38 | 
39 | # Editor backup files #
40 | #######################
41 | *~
42 | # generated automatically
43 | api/
44 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/doc/cli.rst:
--------------------------------------------------------------------------------
 1 | .. _cli:
 2 | 
 3 | Command line interface
 4 | ======================
 5 | 
 6 | .. argparse::
 7 |    :module: dpdispatcher.dpdisp
 8 |    :func: main_parser
 9 |    :prog: dpdisp
10 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | from datetime import date
 16 | 
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = "DPDispatcher"
 23 | copyright = f"2020-{date.today().year}, Deep Modeling"
 24 | author = "DeepModeling"
 25 | 
 26 | 
 27 | # -- General configuration ---------------------------------------------------
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     "deepmodeling_sphinx",
 34 |     "dargs.sphinx",
 35 |     "myst_parser",
 36 |     "sphinx_book_theme",
 37 |     "sphinx.ext.viewcode",
 38 |     "sphinx.ext.intersphinx",
 39 |     "numpydoc",
 40 |     "sphinx.ext.autosummary",
 41 |     "sphinxarg.ext",
 42 | ]
 43 | 
 44 | # Add any paths that contain templates here, relative to this directory.
 45 | templates_path = ["_templates"]
 46 | 
 47 | # List of patterns, relative to source directory, that match files and
 48 | # directories to ignore when looking for source files.
 49 | # This pattern also affects html_static_path and html_extra_path.
 50 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 51 | 
 52 | 
 53 | # -- Options for HTML output -------------------------------------------------
 54 | 
 55 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 56 | # a list of builtin themes.
 57 | #
 58 | html_theme = "sphinx_book_theme"
 59 | 
 60 | # Add any paths that contain custom static files (such as style sheets) here,
 61 | # relative to this directory. They are copied after the builtin static files,
 62 | # so a file named "default.css" will overwrite the builtin "default.css".
 63 | html_static_path = ["_static"]
 64 | html_css_files = []
 65 | 
 66 | autodoc_default_flags = ["members"]
 67 | autosummary_generate = True
 68 | master_doc = "index"
 69 | 
 70 | 
 71 | def run_apidoc(_):
 72 |     from sphinx.ext.apidoc import main
 73 | 
 74 |     sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 75 |     cur_dir = os.path.abspath(os.path.dirname(__file__))
 76 |     module = os.path.join(cur_dir, "..", "dpdispatcher")
 77 |     main(
 78 |         [
 79 |             "-M",
 80 |             "--tocfile",
 81 |             "api",
 82 |             "-H",
 83 |             "DPDispatcher API",
 84 |             "-o",
 85 |             os.path.join(cur_dir, "api"),
 86 |             module,
 87 |             "--force",
 88 |         ]
 89 |     )
 90 | 
 91 | 
 92 | def setup(app):
 93 |     app.connect("builder-inited", run_apidoc)
 94 | 
 95 | 
 96 | intersphinx_mapping = {
 97 |     "python": ("https://docs.python.org/", None),
 98 |     "dargs": ("https://docs.deepmodeling.com/projects/dargs/en/latest/", None),
 99 | }
100 | 
101 | myst_enable_extensions = [
102 |     "colon_fence",
103 | ]
104 | 


--------------------------------------------------------------------------------
/doc/credits.rst:
--------------------------------------------------------------------------------
1 | Authors
2 | =======
3 | 
4 | .. git-shortlog-authors::
5 | 


--------------------------------------------------------------------------------
/doc/env.md:
--------------------------------------------------------------------------------
 1 | # Environment variables
 2 | 
 3 | When launching a job, dpdispatcher sets the following environment variables according to the resources, in addition to user-defined environment variables:
 4 | 
 5 | :::{envvar} DPDISPATCHER_NUMBER_NODE
 6 | 
 7 | The number of nodes required for each job.
 8 | 
 9 | :::
10 | 
11 | :::{envvar} DPDISPATCHER_CPU_PER_NODE
12 | 
13 | CPU numbers of each node assigned to each job.
14 | 
15 | :::
16 | 
17 | :::{envvar} DPDISPATCHER_GPU_PER_NODE
18 | 
19 | GPU numbers of each node assigned to each job.
20 | 
21 | :::
22 | 
23 | :::{envvar} DPDISPATCHER_QUEUE_NAME
24 | 
25 | The queue name of batch job scheduler system.
26 | 
27 | :::
28 | 
29 | :::{envvar} DPDISPATCHER_GROUP_SIZE
30 | 
31 | The number of tasks in a job. 0 means infinity.
32 | 
33 | :::
34 | 
35 | These environment variables can be used in the {dargs:argument}`command <task/command>`, for example, `mpirun -n ${DPDISPATCHER_CPU_PER_NODE} xx.run`.
36 | 


--------------------------------------------------------------------------------
/doc/examples/expanse.md:
--------------------------------------------------------------------------------
 1 | # Running the DeePMD-kit on the Expanse cluster
 2 | 
 3 | [Expanse](https://www.sdsc.edu/support/user_guides/expanse.html) is a cluster operated by the San Diego Supercomputer Center. Here we provide an example to run jobs on the expanse.
 4 | 
 5 | The machine parameters are provided below. Expanse uses the SLURM workload manager for job scheduling. {ref}`remote_root <machine/remote_root>` has been created in advance. It's worth metioned that we do not recommend to use the password, so [SSH keys](https://www.ssh.com/academy/ssh/key) are used instead to improve security.
 6 | 
 7 | ```{literalinclude} ../../examples/machine/expanse.json
 8 | :language: json
 9 | :linenos:
10 | ```
11 | 
12 | Expanse's standard compute nodes are each powered by two 64-core AMD EPYC 7742 processors and contain 256 GB of DDR4 memory. Here, we request one node with 32 cores and 16 GB memory from the `shared` partition. Expanse does not support `--gres=gpu:0` command, so we use {ref}`custom_gpu_line <resources[Slurm]/kwargs/custom_gpu_line>` to customize the statement.
13 | 
14 | ```{literalinclude} ../../examples/resources/expanse_cpu.json
15 | :language: json
16 | :linenos:
17 | ```
18 | 
19 | The following task parameter runs a DeePMD-kit task, forwarding an input file and backwarding graph files. Here, the data set will be used among all the tasks, so it is not included in the {ref}`forward_files <task/forward_files>`. Instead, it should be included in the submission's {ref}`forward_common_files <task/forward_common_files>`.
20 | 
21 | ```{literalinclude} ../../examples/task/deepmd-kit.json
22 | :language: json
23 | :linenos:
24 | ```
25 | 


--------------------------------------------------------------------------------
/doc/examples/g16.md:
--------------------------------------------------------------------------------
 1 | # Running Gaussian 16 with failure allowed
 2 | 
 3 | Typically, a task will retry three times if the exit code is not zero. Sometimes, one may allow non-zero code. For example, when running large amounts of Gaussian 16 single-point calculation tasks, some of the Gaussian 16 tasks may throw SCF errors and return a non-zero code. One can append `||:` to the command:
 4 | 
 5 | ```{literalinclude} ../../examples/task/g16.json
 6 | :language: json
 7 | :linenos:
 8 | ```
 9 | 
10 | This command ensures the task will always provide zero code.
11 | 


--------------------------------------------------------------------------------
/doc/examples/shell.md:
--------------------------------------------------------------------------------
 1 | # Running multiple MD tasks on a GPU workstation
 2 | 
 3 | In this example, we are going to show how to run multiple MD tasks on a GPU workstation. This workstation does not install any job scheduling packages installed, so we will use `Shell` as {ref}`batch_type <machine/batch_type>`.
 4 | 
 5 | ```{literalinclude} ../../examples/machine/mandu.json
 6 | :language: json
 7 | :linenos:
 8 | ```
 9 | 
10 | The workstation has 48 cores of CPUs and 8 RTX3090 cards. Here we hope each card runs 6 tasks at the same time, as each task does not consume too many GPU resources. Thus, {ref}`strategy/if_cuda_multi_devices <resources/strategy/if_cuda_multi_devices>` is set to `true` and {ref}`para_deg <resources/para_deg>` is set to 6.
11 | 
12 | ```{literalinclude} ../../examples/resources/mandu.json
13 | :language: json
14 | :linenos:
15 | ```
16 | 
17 | Note that {ref}`group_size <resources/group_size>` should be set to `0` (means infinity) to ensure there is only one job and avoid running multiple jobs at the same time.
18 | 


--------------------------------------------------------------------------------
/doc/examples/template.md:
--------------------------------------------------------------------------------
 1 | # Customizing the submission script header
 2 | 
 3 | When submitting jobs to some clusters, such as the [Tiger Cluster](https://researchcomputing.princeton.edu/systems/tiger) at Princeton University, the Slurm header is quite different from the standard one. In this case, DPDispatcher allows users to customize the templates by setting {dargs:argument}`strategy/customized_script_header_template_file <resources/strategy/customized_script_header_template_file>` to a template file:
 4 | 
 5 | ```{literalinclude} ../../examples/resources/tiger.json
 6 | :language: json
 7 | :linenos:
 8 | ```
 9 | 
10 | `template.slurm` is the template file, where {meth}`str.format` is used to format the template with [Resources Parameters](resources):
11 | 
12 | ```{literalinclude} ../../examples/resources/template.slurm
13 | :linenos:
14 | ```
15 | 
16 | See [Python Format String Syntax](https://docs.python.org/3/library/string.html#formatstrings) for how to insert parameters inside the template.
17 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. deepmd-kit documentation master file, created by
 2 |    sphinx-quickstart on Sat Nov 21 18:36:24 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | DPDispatcher's documentation
 7 | ======================================
 8 | 
 9 | DPDispatcher is a Python package used to generate HPC (High Performance Computing) scheduler systems (Slurm/PBS/LSF/JH_SCheduler/dpcloudserver) jobs input scripts and submit these scripts to HPC systems and poke until they finish.
10 | 
11 | DPDispatcher will monitor (poke) until these jobs finish and download the results files (if these jobs is running on remote systems connected by SSH).
12 | 
13 | .. toctree::
14 |    :maxdepth: 2
15 |    :caption: Contents:
16 | 
17 | 
18 |    install
19 |    getting-started
20 |    context
21 |    batch
22 |    machine
23 |    resources
24 |    task
25 |    env
26 |    run
27 |    cli
28 |    api/api
29 | 
30 | .. toctree::
31 |    :caption: Examples
32 |    :glob:
33 | 
34 |    examples/expanse
35 |    examples/g16
36 |    examples/shell
37 |    examples/template
38 | 
39 | .. toctree::
40 |    :caption: Project details
41 |    :glob:
42 | 
43 |    credits
44 | 
45 | Indices and tables
46 | ==================
47 | 
48 | * :ref:`genindex`
49 | * :ref:`modindex`
50 | * :ref:`search`
51 | 


--------------------------------------------------------------------------------
/doc/install.md:
--------------------------------------------------------------------------------
 1 | # Install DPDispatcher
 2 | 
 3 | DPDispatcher can installed by `pip`:
 4 | 
 5 | ```bash
 6 | pip install dpdispatcher
 7 | ```
 8 | 
 9 | To add [Bohrium](https://bohrium.dp.tech/) support, execute
10 | 
11 | ```bash
12 | pip install dpdispatcher[bohrium]
13 | ```
14 | 


--------------------------------------------------------------------------------
/doc/machine.rst:
--------------------------------------------------------------------------------
1 | Machine parameters
2 | ======================================
3 | .. note::
4 |    One can load, modify, and export the input file by using our effective web-based tool `DP-GUI <https://dpgui.deepmodeling.com/input/dpdispatcher-machine>`_ online or hosted using the :ref:`command line interface <cli>` :code:`dpdisp gui`. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file.
5 | 
6 | .. dargs::
7 |    :module: dpdispatcher.arginfo
8 |    :func: machine_dargs
9 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/doc/pep723.rst:
--------------------------------------------------------------------------------
1 | .. dargs::
2 |    :module: dpdispatcher.run
3 |    :func: pep723_args
4 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | .[docs]
2 | 


--------------------------------------------------------------------------------
/doc/resources.rst:
--------------------------------------------------------------------------------
1 | Resources parameters
2 | ======================================
3 | .. note::
4 |    One can load, modify, and export the input file by using our effective web-based tool `DP-GUI <https://dpgui.deepmodeling.com/input/dpdispatcher-resources>`_ online or hosted using the :ref:`command line interface <cli>` :code:`dpdisp gui`. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file for.
5 | 
6 | .. dargs::
7 |    :module: dpdispatcher.arginfo
8 |    :func: resources_dargs
9 | 


--------------------------------------------------------------------------------
/doc/run.md:
--------------------------------------------------------------------------------
 1 | # Run Python scripts
 2 | 
 3 | DPDispatcher can be used to directly run a single Python script:
 4 | 
 5 | ```sh
 6 | dpdisp run script.py
 7 | ```
 8 | 
 9 | The script must include [inline script metadata](https://packaging.python.org/en/latest/specifications/inline-script-metadata/) compliant with [PEP 723](https://peps.python.org/pep-0723/).
10 | An example of the script is shown below.
11 | 
12 | ```{literalinclude} ../examples/dpdisp_run.py
13 | :language: py
14 | :linenos:
15 | ```
16 | 
17 | The PEP 723 metadata entries for `tool.dpdispatcher` are defined as follows:
18 | 
19 | ```{eval-rst}
20 | .. include:: pep723.rst
21 | ```
22 | 


--------------------------------------------------------------------------------
/doc/task.rst:
--------------------------------------------------------------------------------
1 | Task parameters
2 | ======================================
3 | .. note::
4 |    One can load, modify, and export the input file by using our effective web-based tool `DP-GUI <https://dpgui.deepmodeling.com/input/dpdispatcher-task>`_ online or hosted using the :ref:`command line interface <cli>` :code:`dpdisp gui`. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file.
5 | 
6 | .. dargs::
7 |    :module: dpdispatcher.arginfo
8 |    :func: task_dargs
9 | 


--------------------------------------------------------------------------------
/dpdispatcher/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = "DeepModeling Team"
 2 | __copyright__ = "Copyright 2019-2023, DeepModeling"
 3 | __status__ = "Production"
 4 | try:
 5 |     from ._version import version as __version__
 6 | except ImportError:
 7 |     __version__ = "unknown"
 8 | 
 9 | import dpdispatcher.contexts  # noqa: F401
10 | import dpdispatcher.machines  # noqa: F401
11 | from dpdispatcher.machine import Machine
12 | from dpdispatcher.submission import Job, Resources, Submission, Task
13 | 
14 | __all__ = [
15 |     "__version__",
16 |     "Machine",
17 |     "Submission",
18 |     "Task",
19 |     "Job",
20 |     "Resources",
21 | ]
22 | 


--------------------------------------------------------------------------------
/dpdispatcher/__main__.py:
--------------------------------------------------------------------------------
1 | """Package dp entry point."""
2 | 
3 | from dpdispatcher.dpdisp import (
4 |     main,
5 | )
6 | 
7 | if __name__ == "__main__":
8 |     main()
9 | 


--------------------------------------------------------------------------------
/dpdispatcher/arginfo.py:
--------------------------------------------------------------------------------
1 | from dpdispatcher.machine import Machine
2 | from dpdispatcher.submission import Resources, Task
3 | 
4 | resources_dargs = Resources.arginfo
5 | machine_dargs = Machine.arginfo
6 | task_dargs = Task.arginfo
7 | 


--------------------------------------------------------------------------------
/dpdispatcher/contexts/__init__.py:
--------------------------------------------------------------------------------
 1 | """Contexts."""
 2 | 
 3 | import importlib
 4 | from pathlib import Path
 5 | 
 6 | PACKAGE_BASE = "dpdispatcher.contexts"
 7 | NOT_LOADABLE = ("__init__.py",)
 8 | 
 9 | for module_file in Path(__file__).parent.glob("*.py"):
10 |     if module_file.name not in NOT_LOADABLE:
11 |         module_name = f".{module_file.stem}"
12 |         importlib.import_module(module_name, PACKAGE_BASE)
13 | 


--------------------------------------------------------------------------------
/dpdispatcher/dlog.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | import warnings
 5 | 
 6 | dlog = logging.getLogger("dpdispatcher")
 7 | dlog.propagate = False
 8 | dlog.setLevel(logging.INFO)
 9 | cwd_logfile_path = os.path.join(os.getcwd(), "dpdispatcher.log")
10 | dlogf = logging.FileHandler(cwd_logfile_path, delay=True)
11 | try:
12 |     dlog.addHandler(dlogf)
13 |     dlog.info(f"LOG INIT:dpdispatcher log direct to {cwd_logfile_path}")
14 | except PermissionError:
15 |     dlog.removeHandler(dlogf)
16 |     warnings.warn(
17 |         f"dump logfile dpdispatcher.log to {cwd_logfile_path} meet permission error. redirect the log to ~/dpdispatcher.log"
18 |     )
19 |     dlogf = logging.FileHandler(
20 |         os.path.join(os.path.expanduser("~"), "dpdispatcher.log"), delay=True
21 |     )
22 |     dlog.addHandler(dlogf)
23 |     dlog.info("LOG INIT:dpdispatcher log init at ~/dpdispatcher.log")
24 | 
25 | dlogf_formatter = logging.Formatter("%(asctime)s - %(levelname)s : %(message)s")
26 | dlogf.setFormatter(dlogf_formatter)
27 | # dlog.addHandler(dlogf)
28 | 
29 | dlog_stdout = logging.StreamHandler(sys.stdout)
30 | dlog_stdout.setFormatter(dlogf_formatter)
31 | dlog.addHandler(dlog_stdout)
32 | 
33 | __all__ = [
34 |     "dlog",
35 | ]
36 | 


--------------------------------------------------------------------------------
/dpdispatcher/dpcloudserver/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/dpdispatcher/4816095c9e711259877fb90023ce74ce527ba5c3/dpdispatcher/dpcloudserver/__init__.py


--------------------------------------------------------------------------------
/dpdispatcher/dpcloudserver/client.py:
--------------------------------------------------------------------------------
1 | """Provide backward compatbility with dflow."""
2 | 
3 | from dpdispatcher.utils.dpcloudserver.client import RequestInfoException
4 | 
5 | __all__ = [
6 |     "RequestInfoException",
7 | ]
8 | 


--------------------------------------------------------------------------------
/dpdispatcher/entrypoints/__init__.py:
--------------------------------------------------------------------------------
1 | """Entry points."""
2 | 


--------------------------------------------------------------------------------
/dpdispatcher/entrypoints/gui.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: LGPL-3.0-or-later
 2 | """DP-GUI entrypoint."""
 3 | 
 4 | 
 5 | def start_dpgui(*, port: int, bind_all: bool, **kwargs):
 6 |     """Host DP-GUI server.
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     port : int
11 |         The port to serve DP-GUI on.
12 |     bind_all : bool
13 |         Serve on all public interfaces. This will expose your DP-GUI instance
14 |         to the network on both IPv4 and IPv6 (where available).
15 |     **kwargs
16 |         additional arguments
17 | 
18 |     Raises
19 |     ------
20 |     ModuleNotFoundError
21 |         The dpgui package is not installed
22 |     """
23 |     try:
24 |         from dpgui import (
25 |             start_dpgui,
26 |         )
27 |     except ModuleNotFoundError as e:
28 |         raise ModuleNotFoundError(
29 |             "To use DP-GUI, please install the dpgui package:\npip install dpgui"
30 |         ) from e
31 |     start_dpgui(port=port, bind_all=bind_all)
32 | 


--------------------------------------------------------------------------------
/dpdispatcher/entrypoints/run.py:
--------------------------------------------------------------------------------
 1 | """Run PEP 723 script."""
 2 | 
 3 | from dpdispatcher.run import run_pep723
 4 | 
 5 | 
 6 | def run(*, filename: str):
 7 |     with open(filename) as f:
 8 |         script = f.read()
 9 |     run_pep723(script)
10 | 


--------------------------------------------------------------------------------
/dpdispatcher/entrypoints/submission.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | from dpdispatcher.dlog import dlog
  4 | from dpdispatcher.submission import Submission
  5 | from dpdispatcher.utils.job_status import JobStatus
  6 | from dpdispatcher.utils.record import record
  7 | 
  8 | 
  9 | def handle_submission(
 10 |     *,
 11 |     submission_hash: str,
 12 |     download_terminated_log: bool = False,
 13 |     download_finished_task: bool = False,
 14 |     clean: bool = False,
 15 |     reset_fail_count: bool = False,
 16 | ):
 17 |     """Handle terminated submission.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     submission_hash : str
 22 |         Submission hash to download.
 23 |     download_terminated_log : bool, optional
 24 |         Download log files of terminated tasks.
 25 |     download_finished_task : bool, optional
 26 |         Download finished tasks.
 27 |     clean : bool, optional
 28 |         Clean submission.
 29 |     reset_fail_count : bool, optional
 30 |         Reset fail count of all jobs to zero.
 31 | 
 32 |     Raises
 33 |     ------
 34 |     ValueError
 35 |         At least one action should be specified.
 36 |     """
 37 |     if (
 38 |         int(download_terminated_log)
 39 |         + int(download_finished_task)
 40 |         + int(clean)
 41 |         + int(reset_fail_count)
 42 |         == 0
 43 |     ):
 44 |         raise ValueError("At least one action should be specified.")
 45 | 
 46 |     submission_file = record.get_submission(submission_hash)
 47 |     submission = Submission.submission_from_json(str(submission_file))
 48 |     submission.belonging_tasks = [
 49 |         task for job in submission.belonging_jobs for task in job.job_task_list
 50 |     ]
 51 |     # TODO: for unclear reason, the submission_hash may be changed
 52 |     submission.submission_hash = submission_hash
 53 |     submission.machine.context.bind_submission(submission)
 54 |     if reset_fail_count:
 55 |         for job in submission.belonging_jobs:
 56 |             job.fail_count = 0
 57 |         # save to remote and local
 58 |         submission.submission_to_json()
 59 |         record.write(submission)
 60 |     if int(download_terminated_log) + int(download_finished_task) + int(clean) == 0:
 61 |         # if only reset_fail_count, no need to update submission state (expensive)
 62 |         return
 63 |     submission.update_submission_state()
 64 |     submission.submission_to_json()
 65 |     record.write(submission)
 66 | 
 67 |     terminated_tasks = []
 68 |     finished_tasks = []
 69 |     for task in submission.belonging_tasks:
 70 |         task.get_task_state(submission.machine.context)
 71 |         if task.task_state == JobStatus.terminated:
 72 |             terminated_tasks.append(task)
 73 |         elif task.task_state == JobStatus.finished:
 74 |             finished_tasks.append(task)
 75 |     submission.belonging_tasks = []
 76 | 
 77 |     if download_terminated_log:
 78 |         for task in terminated_tasks:
 79 |             task.backward_files = [task.outlog, task.errlog]
 80 |         submission.belonging_tasks += terminated_tasks
 81 |     if download_finished_task:
 82 |         submission.belonging_tasks += finished_tasks
 83 | 
 84 |     submission.download_jobs()
 85 | 
 86 |     if download_terminated_log:
 87 |         terminated_log_files = []
 88 |         for task in terminated_tasks:
 89 |             assert submission.local_root is not None
 90 |             terminated_log_files.append(
 91 |                 Path(submission.local_root) / task.task_work_path / task.outlog
 92 |             )
 93 |             terminated_log_files.append(
 94 |                 Path(submission.local_root) / task.task_work_path / task.errlog
 95 |             )
 96 | 
 97 |         dlog.info(
 98 |             "Terminated logs are downloaded into:\n  "
 99 |             + "\n  ".join([str(f) for f in terminated_log_files])
100 |         )
101 | 
102 |     if clean:
103 |         submission.clean_jobs()
104 | 


--------------------------------------------------------------------------------
/dpdispatcher/machines/__init__.py:
--------------------------------------------------------------------------------
 1 | """Machines."""
 2 | 
 3 | import importlib
 4 | from pathlib import Path
 5 | 
 6 | PACKAGE_BASE = "dpdispatcher.machines"
 7 | NOT_LOADABLE = ("__init__.py",)
 8 | 
 9 | for module_file in Path(__file__).parent.glob("*.py"):
10 |     if module_file.name not in NOT_LOADABLE:
11 |         module_name = f".{module_file.stem}"
12 |         importlib.import_module(module_name, PACKAGE_BASE)
13 | 


--------------------------------------------------------------------------------
/dpdispatcher/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utils."""
2 | 


--------------------------------------------------------------------------------
/dpdispatcher/utils/dpcloudserver/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import Client
2 | 
3 | __all__ = ["Client"]
4 | 


--------------------------------------------------------------------------------
/dpdispatcher/utils/dpcloudserver/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | HTTP_TIME_OUT = 30
 4 | 
 5 | API_HOST = os.environ.get("DPDISPATCHER_LEBESGUE_API_HOST", "https://bohrium.dp.tech/")
 6 | API_LOGGER_STACK_INFO = os.environ.get("API_LOGGER_STACK_INFO", "")
 7 | ALI_STS_ENDPOINT = os.environ.get(
 8 |     "DPDISPATCHER_LEBESGUE_ALI_STS_ENDPOINT", "http://oss-cn-shenzhen.aliyuncs.com"
 9 | )
10 | ALI_STS_BUCKET_NAME = os.environ.get(
11 |     "DPDISPATCHER_LEBESGUE_ALI_STS_BUCKET_NAME", "dpcloudserver"
12 | )
13 | ALI_OSS_BUCKET_URL = os.environ.get(
14 |     "DPDISPATCHER_LEBESGUE_ALI_OSS_BUCKET_URL",
15 |     "https://dpcloudserver.oss-cn-shenzhen.aliyuncs.com/",
16 | )
17 | 


--------------------------------------------------------------------------------
/dpdispatcher/utils/dpcloudserver/retcode.py:
--------------------------------------------------------------------------------
 1 | # 2开头的错误代码第二位代表错误等级
 2 | # 0. 严重错误; 1. 普通错误; 2. 规则错误; 3. 一般信息; 4. 未知错误
 3 | class RETCODE:
 4 |     OK = "0000"  # 正常
 5 |     DBERR = "2000"  # 数据库异常
 6 |     THIRDERR = "2001"  # 第三方异常
 7 |     DATAERR = "2002"  # 数据异常
 8 |     IOERR = "2003"  # IO异常
 9 | 
10 |     TOKENINVALID = "2100"  # 登陆错误
11 |     PARAMERR = "2101"  # 参数错误
12 |     USERERR = "2102"  # 用户异常
13 |     ROLEERR = "2103"  # 权限错误
14 |     PWDERR = "2104"  # 密码错误
15 |     VERIFYERR = "2105"  # 验证错误
16 | 
17 |     REQERR = "2200"  # 请求错误
18 | 
19 |     NODATA = "2300"  # 无数据
20 |     UNDERDEBUG = "2301"  # debug模式下无法使用
21 | 
22 |     UNKOWNERR = "2400"  # 未知错误
23 | 


--------------------------------------------------------------------------------
/dpdispatcher/utils/dpcloudserver/zip_file.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | from zipfile import ZipFile
 4 | 
 5 | # def zip_file_list(root_path, zip_filename, file_list=[]):
 6 | #     shutil.make_archive(base_name=zip_filename,
 7 | #         root_dir=root_path,)
 8 | 
 9 | 
10 | def zip_file_list(root_path, zip_filename, file_list=[]):
11 |     out_zip_file = os.path.join(root_path, zip_filename)
12 |     # print('debug: file_list', file_list)
13 |     zip_obj = ZipFile(out_zip_file, "w")
14 |     for f in file_list:
15 |         matched_files = os.path.join(root_path, f)
16 |         for ii in glob.glob(matched_files):
17 |             # print('debug: matched_files:ii', ii)
18 |             if os.path.isdir(ii):
19 |                 arcname = os.path.relpath(ii, start=root_path)
20 |                 zip_obj.write(ii, arcname)
21 |                 for root, dirs, files in os.walk(ii):
22 |                     for file in files:
23 |                         filename = os.path.join(root, file)
24 |                         arcname = os.path.relpath(filename, start=root_path)
25 |                         # print('debug: filename:arcname:root_path', filename, arcname, root_path)
26 |                         zip_obj.write(filename, arcname)
27 |             else:
28 |                 arcname = os.path.relpath(ii, start=root_path)
29 |                 zip_obj.write(ii, arcname)
30 |     zip_obj.close()
31 |     return out_zip_file
32 | 
33 | 
34 | # def zip_files(root_path, out_file, selected=[]):
35 | #     obj = ZipFile(out_file, "w")
36 | #     # change /xxx/ to /xxx or xxx to /xxx and pop ''
37 | #     for i in range(len(selected)):
38 | #         if not selected[i]:
39 | #             selected.pop(i)
40 | #             continue
41 | 
42 | #         selected[i] = selected[i].strip()
43 | #         if selected[i].endswith('/'):
44 | #             selected[i] = selected[i][:-1]
45 | #         if not selected[i].startswith('/'):
46 | #             selected[i] = '/{}'.format(selected[i])
47 | 
48 | #     for root, dirs, files in os.walk(root_path):
49 | #         for item in files:
50 | #             filename = os.path.join(root, item)
51 | #             arcname = filename.replace(root_path,'')
52 | #             if not is_selected(arcname, selected):
53 | #                 continue
54 | 
55 | #             obj.write(filename, arcname)
56 | #     if not obj.filelist:
57 | #         return
58 | 
59 | #     obj.close()
60 | 
61 | 
62 | # def is_selected(arcname, selected):
63 | #     if not selected:
64 | #         return True
65 | 
66 | #     arcdir = os.path.dirname(arcname)
67 | #     for s in selected:
68 | #         if arcname == s:
69 | #             return True
70 | 
71 | #         if arcdir == s:
72 | #             return True
73 | 
74 | #         if arcname.startswith(s + '/'):
75 | #             return True
76 | 
77 | #     return False
78 | 
79 | 
80 | def unzip_file(zip_file, out_dir="./"):
81 |     obj = ZipFile(zip_file, "r")
82 |     for item in obj.namelist():
83 |         obj.extract(item, out_dir)
84 | 


--------------------------------------------------------------------------------
/dpdispatcher/utils/job_status.py:
--------------------------------------------------------------------------------
 1 | from enum import IntEnum
 2 | 
 3 | 
 4 | class JobStatus(IntEnum):
 5 |     unsubmitted = 1
 6 |     waiting = 2
 7 |     running = 3
 8 |     terminated = 4
 9 |     finished = 5
10 |     completing = 6
11 |     unknown = 100
12 | 
13 | 
14 | #     def __str__(self):
15 | #         return repr(self)
16 | 


--------------------------------------------------------------------------------
/dpdispatcher/utils/record.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from typing import List
 4 | 
 5 | 
 6 | class Record:
 7 |     """Record failed or canceled submissions."""
 8 | 
 9 |     def __init__(self) -> None:
10 |         self.record_directory = Path.home() / ".dpdispatcher" / "submission"
11 |         self.record_directory.mkdir(parents=True, exist_ok=True)
12 | 
13 |     def get_submissions(self) -> List[str]:
14 |         """Get all stored submission hashes.
15 | 
16 |         Returns
17 |         -------
18 |         list[str]
19 |             List of submission hashes.
20 |         """
21 |         return [
22 |             f.stem
23 |             for f in self.record_directory.iterdir()
24 |             if (f.is_file() and f.suffix == ".json")
25 |         ]
26 | 
27 |     def write(self, submission) -> Path:
28 |         """Write submission data to file.
29 | 
30 |         Parameters
31 |         ----------
32 |         submission : dpdispatcher.Submission
33 |             Submission data.
34 | 
35 |         Returns
36 |         -------
37 |         pathlib.Path
38 |             Path to submission data.
39 |         """
40 |         submission_path = self.record_directory / f"{submission.submission_hash}.json"
41 |         submission_path.write_text(json.dumps(submission.serialize(), indent=2))
42 |         return submission_path
43 | 
44 |     def get_submission(self, hash: str, not_exist_ok: bool = False) -> Path:
45 |         """Get submission data by hash.
46 | 
47 |         Parameters
48 |         ----------
49 |         hash : str
50 |             Hash of submission data.
51 | 
52 |         Returns
53 |         -------
54 |         pathlib.Path
55 |             Path to submission data.
56 |         """
57 |         submission_file = self.record_directory / f"{hash}.json"
58 |         if not not_exist_ok and not submission_file.is_file():
59 |             raise FileNotFoundError(f"Submission file not found: {submission_file}")
60 |         return submission_file
61 | 
62 |     def remove(self, hash: str):
63 |         """Remove submission data by hash.
64 | 
65 |         Call this method when the remote directory is cleaned.
66 | 
67 |         Parameters
68 |         ----------
69 |         hash : str
70 |             Hash of submission data.
71 |         """
72 |         path = self.get_submission(hash, not_exist_ok=True)
73 |         if path.is_file():
74 |             path.unlink()
75 | 
76 | 
77 | # the record object can be globally used
78 | record = Record()
79 | __all__ = ["record"]
80 | 


--------------------------------------------------------------------------------
/examples/dpdisp_run.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # # dpdispatcher doesn't use `requires-python` and `dependencies`
 3 | # requires-python = ">=3"
 4 | # dependencies = [
 5 | # ]
 6 | # [tool.dpdispatcher]
 7 | # work_base = "./"
 8 | # forward_common_files=[]
 9 | # backward_common_files=[]
10 | # [tool.dpdispatcher.machine]
11 | # batch_type = "Shell"
12 | # local_root = "./"
13 | # context_type = "LazyLocalContext"
14 | # [tool.dpdispatcher.resources]
15 | # number_node = 1
16 | # cpu_per_node = 1
17 | # gpu_per_node = 0
18 | # group_size = 0
19 | # [[tool.dpdispatcher.task_list]]
20 | # # no need to contain the script filename
21 | # command = "python"
22 | # # can be a glob pattern
23 | # task_work_path = "./"
24 | # forward_files = []
25 | # backward_files = ["log"]
26 | # ///
27 | 
28 | print("hello world!")
29 | 


--------------------------------------------------------------------------------
/examples/machine/expanse.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_type": "Slurm",
 3 |   "local_root": "./",
 4 |   "remote_root": "/expanse/lustre/scratch/njzjz/temp_project/dpgen_workdir",
 5 |   "clean_asynchronously": true,
 6 |   "context_type": "SSHContext",
 7 |   "remote_profile": {
 8 |     "hostname": "login.expanse.sdsc.edu",
 9 |     "username": "njzjz",
10 |     "port": 22
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/machine/lazy_local.json:
--------------------------------------------------------------------------------
1 | {
2 |   "batch_type": "Shell",
3 |   "local_root": "./",
4 |   "context_type": "LazyLocalContext"
5 | }
6 | 


--------------------------------------------------------------------------------
/examples/machine/mandu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_type": "Shell",
 3 |   "local_root": "./",
 4 |   "remote_root": "/data2/jinzhe/dpgen_workdir",
 5 |   "clean_asynchronously": true,
 6 |   "context_type": "SSHContext",
 7 |   "remote_profile": {
 8 |     "hostname": "mandu.iqb.rutgers.edu",
 9 |     "username": "jz748",
10 |     "port": 22
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/machine/ssh_proxy_command.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_type": "Shell",
 3 |   "context_type": "SSHContext",
 4 |   "local_root": "./",
 5 |   "remote_root": "/home/user/work",
 6 |   "remote_profile": {
 7 |     "hostname": "internal-server.company.com",
 8 |     "username": "user",
 9 |     "port": 22,
10 |     "key_filename": "~/.ssh/id_rsa",
11 |     "proxy_command": "ssh -W internal-server.company.com:22 -i ~/.ssh/jump_key jumpuser@bastion.company.com"
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/resources/expanse_cpu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "number_node": 1,
 3 |   "cpu_per_node": 1,
 4 |   "gpu_per_node": 0,
 5 |   "queue_name": "shared",
 6 |   "group_size": 1,
 7 |   "custom_flags": [
 8 |     "#SBATCH -c 32",
 9 |     "#SBATCH --mem=16G",
10 |     "#SBATCH --time=48:00:00",
11 |     "#SBATCH --account=rut149",
12 |     "#SBATCH --requeue"
13 |   ],
14 |   "source_list": [
15 |     "activate /home/njzjz/deepmd-kit"
16 |   ],
17 |   "envs": {
18 |     "OMP_NUM_THREADS": 4,
19 |     "TF_INTRA_OP_PARALLELISM_THREADS": 4,
20 |     "TF_INTER_OP_PARALLELISM_THREADS": 8,
21 |     "DP_AUTO_PARALLELIZATION": 1
22 |   },
23 |   "batch_type": "Slurm",
24 |   "kwargs": {
25 |     "custom_gpu_line": "#SBATCH --gpus=0"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/resources/mandu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "number_node": 1,
 3 |   "cpu_per_node": 48,
 4 |   "gpu_per_node": 8,
 5 |   "queue_name": "shell",
 6 |   "group_size": 0,
 7 |   "strategy": {
 8 |     "if_cuda_multi_devices": true
 9 |   },
10 |   "source_list": [
11 |     "activate /home/jz748/deepmd-kit"
12 |   ],
13 |   "envs": {
14 |     "OMP_NUM_THREADS": 1,
15 |     "TF_INTRA_OP_PARALLELISM_THREADS": 1,
16 |     "TF_INTER_OP_PARALLELISM_THREADS": 1
17 |   },
18 |   "para_deg": 6
19 | }
20 | 


--------------------------------------------------------------------------------
/examples/resources/template.slurm:
--------------------------------------------------------------------------------
1 | #!/bin/bash -l
2 | #SBATCH --parsable
3 | #SBATCH --nodes={number_node}
4 | #SBATCH --ntasks-per-node={cpu_per_node}
5 | #SBATCH --qos={kwargs[qos]}
6 | #SBATCH --time=01:02:00
7 | #SBATCH --mem-per-cpu=4G
8 | 


--------------------------------------------------------------------------------
/examples/resources/tiger.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "number_node": 1,
 3 |   "cpu_per_node": 32,
 4 |   "kwargs":{
 5 |     "qos": "tiger-vshort"
 6 |   },
 7 |   "source_list": ["activate abacus_env"],
 8 |   "strategy": {
 9 |     "customized_script_header_template_file": "./template.slurm"
10 |   },
11 |   "group_size": 2000
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/task/deepmd-kit.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "command": "dp train input.json && dp freeze && dp compress",
 3 |     "task_work_path": "model1/",
 4 |     "forward_files": [
 5 |       "input.json"
 6 |     ],
 7 |     "backward_files": [
 8 |       "frozen_model.pb",
 9 |       "frozen_model_compressed.pb"
10 |     ],
11 |     "outlog": "log",
12 |     "errlog": "err"
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/task/g16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "command": "g16 < input > output ||:",
 3 |     "task_work_path": "p1/",
 4 |     "forward_files": [
 5 |       "input"
 6 |     ],
 7 |     "backward_files": [
 8 |       "output"
 9 |     ]
10 | }
11 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=61", "setuptools_scm[toml]>=7"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "dpdispatcher"
  7 | dynamic = ["version"]
  8 | description = "Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish"
  9 | authors = [
 10 |     { name = "DeepModeling" },
 11 | ]
 12 | license = { file = "LICENSE" }
 13 | classifiers = [
 14 |     "Programming Language :: Python :: 3.7",
 15 |     "Programming Language :: Python :: 3.8",
 16 |     "Programming Language :: Python :: 3.9",
 17 |     "Programming Language :: Python :: 3.10",
 18 |     "Programming Language :: Python :: 3.11",
 19 |     "Operating System :: POSIX :: Linux",
 20 |     "Operating System :: MacOS :: MacOS X",
 21 |     "Operating System :: Microsoft :: Windows",
 22 |     "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
 23 | ]
 24 | dependencies = [
 25 |     'paramiko',
 26 |     'dargs>=0.4.1',
 27 |     'requests',
 28 |     'tqdm>=4.9.0',
 29 |     'typing_extensions; python_version < "3.7"',
 30 |     'pyyaml',
 31 |     'tomli >= 1.1.0; python_version < "3.11"',
 32 | ]
 33 | requires-python = ">=3.7"
 34 | readme = "README.md"
 35 | keywords = ["dispatcher", "hpc", "slurm", "lsf", "pbs", "ssh", "jh_unischeduler"]
 36 | 
 37 | [project.urls]
 38 | Homepage = "https://github.com/deepmodeling/dpdispatcher"
 39 | documentation = "https://docs.deepmodeling.com/projects/dpdispatcher"
 40 | repository = "https://github.com/deepmodeling/dpdispatcher"
 41 | 
 42 | [project.scripts]
 43 | dpdisp = "dpdispatcher.dpdisp:main"
 44 | 
 45 | [project.entry-points."dpgui"]
 46 | "DPDispatcher Machine" = "dpdispatcher.arginfo:machine_dargs"
 47 | "DPDispatcher Resources" = "dpdispatcher.arginfo:resources_dargs"
 48 | "DPDispatcher Task" = "dpdispatcher.arginfo:task_dargs"
 49 | 
 50 | [project.optional-dependencies]
 51 | docs = [
 52 |     'sphinx',
 53 |     'myst-parser',
 54 |     'sphinx-book-theme',
 55 |     'numpydoc',
 56 |     'deepmodeling-sphinx>=0.3.0',
 57 |     'dargs>=0.3.1',
 58 |     'sphinx-argparse<0.5.0',
 59 | ]
 60 | cloudserver = ["oss2", "tqdm", "bohrium-sdk"]
 61 | bohrium = ["oss2", "tqdm", "bohrium-sdk"]
 62 | gui = [
 63 |     "dpgui",
 64 | ]
 65 | test = [
 66 |     "dpgui",
 67 | ]
 68 | 
 69 | [tool.setuptools.packages.find]
 70 | include = ["dpdispatcher*"]
 71 | 
 72 | [tool.setuptools_scm]
 73 | write_to = "dpdispatcher/_version.py"
 74 | 
 75 | [tool.pyright]
 76 | include = ['dpdispatcher']
 77 | exclude = [
 78 |     'dpdispatcher/dpcloudserver/temp_test.py',
 79 |     'dpdispatcher/_version.py',
 80 | ]
 81 | 
 82 | [tool.isort]
 83 | profile = "black"
 84 | 
 85 | [tool.ruff.lint]
 86 | select = [
 87 |     "E", # errors
 88 |     "F", # pyflakes
 89 |     "D", # pydocstyle
 90 |     "UP", # pyupgrade
 91 |     "I", # isort
 92 | ]
 93 | ignore = [
 94 |     "E501", # line too long
 95 |     "F841", # local variable is assigned to but never used
 96 |     "E741", # ambiguous variable name
 97 |     "E402", # module level import not at top of file
 98 |     "D413", # missing blank line after last section
 99 |     "D416", # section name should end with a colon
100 |     "D203", # 1 blank line required before class docstring
101 |     "D107", # missing docstring in __init__
102 |     "D213", # multi-line docstring summary should start at the second line
103 |     "D100", # TODO: missing docstring in public module
104 |     "D101", # TODO: missing docstring in public class
105 |     "D102", # TODO: missing docstring in public method
106 |     "D103", # TODO: missing docstring in public function
107 |     "D104", # TODO: missing docstring in public package
108 |     "D105", # TODO: missing docstring in magic method
109 |     "D205", # 1 blank line required between summary line and description
110 |     "D401", # TODO: first line should be in imperative mood
111 |     "D404", # TODO: first word of the docstring should not be This
112 | ]
113 | 
114 | [tool.ruff.lint.pydocstyle]
115 | convention = "numpy"
116 | 


--------------------------------------------------------------------------------
/scripts/script_gen_dargs_docs.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | # import sys, os
 3 | # sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..' )))
 4 | # import dpdispatcher
 5 | from dpdispatcher.machine import Machine
 6 | from dpdispatcher.submission import Resources, Task
 7 | 
 8 | # %%
 9 | resources_dargs_doc = Resources.arginfo().gen_doc()
10 | with open("../doc/resources-auto.rst", "w") as f:
11 |     # print(resources_dargs_doc)
12 |     f.write(resources_dargs_doc)
13 | 
14 | machine_dargs_doc = Machine.arginfo().gen_doc()
15 | with open("../doc/machine-auto.rst", "w") as f:
16 |     f.write(machine_dargs_doc)
17 | 
18 | task_dargs_doc = Task.arginfo().gen_doc()
19 | with open("../doc/task-auto.rst", "w") as f:
20 |     f.write(task_dargs_doc)
21 | 
22 | 
23 | # %%
24 | 


--------------------------------------------------------------------------------
/scripts/script_gen_dargs_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from dargs import ArgumentEncoder
 4 | 
 5 | from dpdispatcher import Machine, Resources, Task
 6 | 
 7 | resources_dargs = Resources.arginfo()
 8 | with open("dpdispatcher-resources.json", "w") as f:
 9 |     json.dump(resources_dargs, f, cls=ArgumentEncoder)
10 | 
11 | machine_dargs = Machine.arginfo()
12 | with open("dpdispatcher-machine.json", "w") as f:
13 |     json.dump(machine_dargs, f, cls=ArgumentEncoder)
14 | 
15 | task_dargs = Task.arginfo()
16 | with open("dpdispatcher-task.json", "w") as f:
17 |     json.dump(task_dargs, f, cls=ArgumentEncoder)
18 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | out.txt


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/dpdispatcher/4816095c9e711259877fb90023ce74ce527ba5c3/tests/__init__.py


--------------------------------------------------------------------------------
/tests/batch.json:
--------------------------------------------------------------------------------
1 | {
2 |     "batch_type": "pbs",
3 |     "context_type": "lazy_local",
4 |     "local_root" : "./test_batch_object",
5 |     "remote_root" : "./tmp/"
6 | }


--------------------------------------------------------------------------------
/tests/context.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import pathlib
 4 | import sys
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 7 | 
 8 | import dpdispatcher  # noqa: F401
 9 | from dpdispatcher.base_context import BaseContext  # noqa: F401
10 | from dpdispatcher.contexts.hdfs_context import HDFSContext  # noqa: F401
11 | from dpdispatcher.contexts.lazy_local_context import LazyLocalContext  # noqa: F401
12 | from dpdispatcher.contexts.local_context import LocalContext  # noqa: F401
13 | from dpdispatcher.contexts.ssh_context import SSHContext, SSHSession  # noqa: F401
14 | 
15 | # test backward compatibility with dflow
16 | from dpdispatcher.dpcloudserver.client import RequestInfoException as _  # noqa: F401
17 | from dpdispatcher.entrypoints.run import run  # noqa: F401
18 | from dpdispatcher.entrypoints.submission import handle_submission  # noqa: F401
19 | from dpdispatcher.machine import Machine  # noqa: F401
20 | from dpdispatcher.machines.distributed_shell import DistributedShell  # noqa: F401
21 | from dpdispatcher.machines.dp_cloud_server import Lebesgue  # noqa: F401
22 | from dpdispatcher.machines.JH_UniScheduler import JH_UniScheduler  # noqa: F401
23 | from dpdispatcher.machines.lsf import LSF  # noqa: F401
24 | from dpdispatcher.machines.pbs import PBS  # noqa: F401
25 | from dpdispatcher.machines.shell import Shell  # noqa: F401
26 | from dpdispatcher.machines.slurm import Slurm  # noqa: F401
27 | from dpdispatcher.submission import Job, Resources, Submission, Task  # noqa: F401
28 | from dpdispatcher.utils.hdfs_cli import HDFS  # noqa: F401
29 | from dpdispatcher.utils.job_status import JobStatus  # noqa: F401
30 | from dpdispatcher.utils.record import record  # noqa: F401
31 | from dpdispatcher.utils.utils import RetrySignal, retry  # noqa: F401
32 | 
33 | 
34 | def setUpModule():
35 |     os.chdir(os.path.abspath(os.path.dirname(__file__)))
36 | 
37 | 
38 | def get_file_md5(file_path):
39 |     return hashlib.md5(pathlib.Path(file_path).read_bytes()).hexdigest()
40 | 


--------------------------------------------------------------------------------
/tests/debug_test_class_submission_init.py:
--------------------------------------------------------------------------------
 1 | # import os,sys,json,glob,shutil,uuid,time
 2 | # import unittest
 3 | # from unittest.mock import MagicMock, patch, PropertyMock
 4 | 
 5 | # sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 6 | # __package__ = 'tests'
 7 | # from .context import LocalSession
 8 | # from .context import LocalContext
 9 | # from .context import PBS
10 | # from .context import JobStatus
11 | # from .context import Submission, Job, Task, Resources
12 | from sample_class import SampleClass
13 | 
14 | # print('in', SampleClass.get_sample_empty_submission())
15 | 
16 | 
17 | empty_submission = SampleClass.get_sample_empty_submission()
18 | task = SampleClass.get_sample_task()
19 | # print('TestSubmissionInit.test_reigister_task:self.empty_submission.belonging_tasks', empty_submission.belonging_tasks)
20 | empty_submission.register_task(task=task)
21 | # print('7890809', SampleClass.get_sample_empty_submission().belonging_tasks)
22 | empty_submission.register_task(task=task)
23 | # print('1441198', SampleClass.get_sample_empty_submission().belonging_tasks)
24 | # self.assertEqual([task], empty_submission.belonging_tasks)
25 | # print('out', SampleClass.get_sample_empty_submission())
26 | # print('TestSubmissionInit.test_register_task_list:task_list', task_list)
27 | # empty_submission = SampleClass.get_sample_empty_submission()
28 | # task_list = SampleClass.get_sample_task_list()
29 | # empty_submission.register_task_list(task_list=task_list)
30 | # self.empty_submission.register_task_list(task_list=task_list)
31 | # self.assertEqual(task_list, empty_submission.belonging_tasks)
32 | 
33 | # def tesk_generate_jobs(self):
34 | #     task_list = SampleClass.get_sample_task_list()
35 | #     self.submission.register_task_list(task_list=task_list)
36 | #     self.submission.generate_jobs()
37 | #     task1, task2, task3, task4 = task_list
38 | #     task_ll = [job.job_task_list for job in self.submission.belonging_jobs]
39 | #     self.assertEqual([[task3, task2], [task4, task1]], task_ll)
40 | 


--------------------------------------------------------------------------------
/tests/devel_test_JH_UniScheduler.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | from dpdispatcher.machine import Machine
 6 | from dpdispatcher.submission import Resources, Submission, Task
 7 | 
 8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 9 | 
10 | # task_need_resources has no effect
11 | with open("jsons/machine_jh_unischeduler.json") as f:
12 |     mdata = json.load(f)
13 | 
14 | machine = Machine.load_from_dict(mdata["machine"])
15 | resources = Resources.load_from_dict(mdata["resources"])
16 | 
17 | submission = Submission(
18 |     work_base="0_md/",
19 |     machine=machine,
20 |     resources=resources,
21 |     forward_common_files=["graph.pb"],
22 |     backward_common_files=[],
23 | )
24 | 
25 | task1 = Task(
26 |     command="lmp -i input.lammps",
27 |     task_work_path="bct-1/",
28 |     forward_files=["conf.lmp", "input.lammps"],
29 |     backward_files=["log.lammps"],
30 | )
31 | task2 = Task(
32 |     command="lmp -i input.lammps",
33 |     task_work_path="bct-2/",
34 |     forward_files=["conf.lmp", "input.lammps"],
35 |     backward_files=["log.lammps"],
36 | )
37 | task3 = Task(
38 |     command="lmp -i input.lammps",
39 |     task_work_path="bct-3/",
40 |     forward_files=["conf.lmp", "input.lammps"],
41 |     backward_files=["log.lammps"],
42 | )
43 | task4 = Task(
44 |     command="lmp -i input.lammps",
45 |     task_work_path="bct-4/",
46 |     forward_files=["conf.lmp", "input.lammps"],
47 |     backward_files=["log.lammps"],
48 | )
49 | submission.register_task_list(
50 |     [
51 |         task1,
52 |         task2,
53 |         task3,
54 |         task4,
55 |     ]
56 | )
57 | submission.run_submission(clean=True)
58 | 


--------------------------------------------------------------------------------
/tests/devel_test_ali_ehpc.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | # from .sample_class import SampleClass
 7 | # from .context import Machine
 8 | # from .context import Resources
 9 | # from dpdispatcher.local_context import LocalSession
10 | # from dpdispatcher.local_context import LocalContext
11 | # from dpdispatcher.lazy_local_context import LazyLocalContext
12 | 
13 | from dpdispatcher.machine import Machine
14 | from dpdispatcher.submission import Resources
15 | from tests.sample_class import SampleClass
16 | 
17 | # from dpdispatcher.pbs import PBS
18 | 
19 | # local_session = LocalSession({'work_path':'test_work_path/'})
20 | # local_context = LocalContext(local_root='test_pbs_dir/', work_profile=local_session)
21 | 
22 | # lazy_local_context = LazyLocalContext(local_root='/home/fengbo/10_dpdispatcher/dpdispatcher/tests/test_pbs_dir', work_profile=None)
23 | 
24 | # ssh_session = SSHSession(hostname='39.106.84.25', remote_root='/home/fengbo/dp_remote', username='fengbo')
25 | # ssh_context = SSHContext(local_root='test_slurm_dir', ssh_session=ssh_session)
26 | 
27 | # pbs = PBS(context=ssh_context)
28 | # pbs = PBS(context=local_context)
29 | # pbs = PBS(context=lazy_local_context)
30 | 
31 | submission = SampleClass.get_sample_submission()
32 | # pbs = SampleClass.get_sample_pbs_local_context()
33 | # slurm = SampleClass.get_sample_slurm_local_context()
34 | 
35 | with open("jsons/machine_ali_ehpc.json") as f:
36 |     compute_dict = json.load(f)
37 | 
38 | machine = Machine.load_from_dict(compute_dict["machine"])
39 | resources = Resources.load_from_dict(compute_dict["resources"])
40 | 
41 | submission.resouces = resources
42 | submission.bind_machine(machine=machine)
43 | # submission.run_submission()
44 | # submission.run_submission(exit_on_submit=True)
45 | submission.run_submission()
46 | 
47 | 
48 | # resources = Resources(number_node=1, cpu_per_node=4, gpu_per_node=1, queue_name="V100_8_32", group_size=2, if_cuda_multi_devices=True)
49 | # submission = Submission(work_base='0_md/', resources=resources,  forward_common_files=['graph.pb'], backward_common_files=[]) #,  batch=PBS)
50 | # task1 = Task(command='lmp_serial -i input.lammps', task_work_path='bct-1/', forward_files=['conf.lmp', 'input.lammps'], backward_files=['log.lammps'], task_need_resources=1)
51 | # task2 = Task(command='lmp_serial -i input.lammps', task_work_path='bct-2/', forward_files=['conf.lmp', 'input.lammps'], backward_files=['log.lammps'], task_need_resources=0.25)
52 | # task3 = Task(command='lmp_serial -i input.lammps', task_work_path='bct-3/', forward_files=['conf.lmp', 'input.lammps'], backward_files=['log.lammps'], task_need_resources=0.25)
53 | # task4 = Task(command='lmp_serial -i input.lammps', task_work_path='bct-4/', forward_files=['conf.lmp', 'input.lammps'], backward_files=['log.lammps'], task_need_resources=0.5)
54 | # submission.register_task_list([task1, task2, task3, task4, ])
55 | # submission.generate_jobs()
56 | # submission.bind_batch(batch=pbs)
57 | # for job in submission.belonging_jobs:
58 | #     job.job_to_json()
59 | # print('111', submission)
60 | # submission2 = Submission.recover_jobs_from_json('./jr.json')
61 | # print('222', submission2)
62 | # print(submission==submission2)
63 | 
64 | # submission1.dump_jobs_fo_json()
65 | # submission2 = Submission.submission_from_json('jsons/submission.json')
66 | # print(677, submission==submission2)
67 | # print(submission1.belonging_jobs)
68 | # print(local_context)
69 | 


--------------------------------------------------------------------------------
/tests/devel_test_dp_cloud_server.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | 
 7 | # from dpdispatcher.batch_object import BatchObject
 8 | from dpdispatcher.machine import Machine
 9 | from dpdispatcher.submission import Resources, Submission, Task
10 | 
11 | # from dpdispatcher.slurm import SlurmResources, Slurm
12 | 
13 | # local_session = LocalSession({'work_path':'temp2'})
14 | # local_context = LocalContext(local_root='test_slurm_dir/', work_profile=local_session)
15 | # lazy_local_context = LazyLocalContext(local_root='./')
16 | 
17 | 
18 | # machine_dict = dict(hostname='localhost', remote_root='/home/dp/dpdispatcher/tests/temp2', username='dp')
19 | # ssh_session = SSHSession(**machine_dict)
20 | # ssh_session = SSHSession(hostname='8.131.233.55', remote_root='/home/dp/dp_remote', username='dp')
21 | # ssh_context = SSHContext(local_root='test_slurm_dir', ssh_session=ssh_session)
22 | # slurm = Slurm(context=ssh_context)
23 | # slurm = Slurm(context=lazy_local_context)
24 | 
25 | # resources = Resources(number_node=1, cpu_per_node=4, gpu_per_node=0, queue_name="1 * NVIDIA P100", group_size=4)
26 | # slurm_sbatch_dict={'mem': '10G', 'cpus_per_task':1, 'time': "120:0:0"}
27 | # slurm_resources = SlurmResources(resources=resources, slurm_sbatch_dict=slurm_sbatch_dict)
28 | 
29 | 
30 | # dp_cloud_server_context = DpCloudServerContext(
31 | #     local_root='test_context_dir/',
32 | #     username='yfb222333',
33 | #     password='yfb222333')
34 | # dp_cloud_server = DpCloudServer(context=dp_cloud_server_context)
35 | # with open('test_dp_cloud_server.json', 'r') as f:
36 | #     jdata = json.load(f)
37 | with open("jsons/machine_dp_cloud_server.json") as f:
38 |     compute_dict = json.load(f)
39 | 
40 | machine = Machine.load_from_dict(compute_dict["machine"])
41 | resources = Resources.load_from_dict(compute_dict["resources"])
42 | 
43 | task1 = Task(
44 |     command="lmp    -i input.lammps",
45 |     task_work_path="bct-1/",
46 |     forward_files=["conf.lmp", "input.lammps"],
47 |     backward_files=["log.lammps"],
48 | )
49 | task2 = Task(
50 |     command="lmp -i input.lammps",
51 |     task_work_path="bct-2/",
52 |     forward_files=["conf.lmp", "input.lammps"],
53 |     backward_files=["log.lammps"],
54 | )
55 | task3 = Task(
56 |     command="lmp   -i input.lammps",
57 |     task_work_path="bct-3/",
58 |     forward_files=["conf.lmp", "input.lammps"],
59 |     backward_files=["log.lammps"],
60 | )
61 | task4 = Task(
62 |     command="lmp -i input.lammps",
63 |     task_work_path="bct-4/",
64 |     forward_files=["conf.lmp", "input.lammps"],
65 |     backward_files=["log.lammps"],
66 | )
67 | task_list = [
68 |     task1,
69 |     task2,
70 |     task3,
71 |     task4,
72 | ]
73 | 
74 | submission = Submission(
75 |     work_base="0_md/",
76 |     machine=machine,
77 |     resources=resources,
78 |     forward_common_files=["graph.pb"],
79 |     backward_common_files=[],
80 |     task_list=task_list,
81 | )
82 | 
83 | submission.run_submission()
84 | 


--------------------------------------------------------------------------------
/tests/devel_test_lazy_ali_ehpc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 5 | 
 6 | from dpdispatcher.lazy_local_context import LazyLocalContext
 7 | from dpdispatcher.pbs import PBS
 8 | from dpdispatcher.submission import Resources, Submission, Task
 9 | 
10 | # local_session = LocalSession({'work_path':'temp2'})
11 | # local_context = LocalContext(local_root='temp1/0_md', work_profile=local_session)
12 | lazy_local_context = LazyLocalContext(
13 |     local_root="/home/fengbo/10_dpdispatcher/dpdispatcher/tests/temp3",
14 |     work_profile=None,
15 | )
16 | 
17 | # pbs = PBS(context=local_context)
18 | pbs = PBS(context=lazy_local_context)
19 | 
20 | resources = Resources(
21 |     number_node=1,
22 |     cpu_per_node=4,
23 |     gpu_per_node=1,
24 |     queue_name="V100_8_32",
25 |     group_size=4,
26 |     if_cuda_multi_devices=True,
27 | )
28 | submission = Submission(work_base="0_md", resources=resources)
29 | task1 = Task(command="lmp_serial -i input.lammps", task_work_path="bct-1")
30 | task2 = Task(command="lmp_serial -i input.lammps", task_work_path="bct-2")
31 | task3 = Task(command="lmp_serial -i input.lammps", task_work_path="bct-3")
32 | task4 = Task(command="lmp_serial -i input.lammps", task_work_path="bct-4")
33 | submission.register_task_list(
34 |     [
35 |         task1,
36 |         task2,
37 |         task3,
38 |         task4,
39 |     ]
40 | )
41 | submission.generate_jobs()
42 | submission.bind_batch(batch=pbs)
43 | # for job in submission.belonging_jobs:
44 | #     job.job_to_json()
45 | # print('111', submission)
46 | # submission2 = Submission.recover_jobs_from_json('./jr.json')
47 | # print('222', submission2)
48 | # print(submission==submission2)
49 | submission.run_submission()
50 | 
51 | # submission1.dump_jobs_fo_json()
52 | # submission2 = Submission.submission_from_json('jsons/submission.json')
53 | # print(677, submission==submission2)
54 | # print(submission1.belonging_jobs)
55 | # print(local_context)
56 | 


--------------------------------------------------------------------------------
/tests/devel_test_lsf.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | from dpdispatcher.machine import Machine
 6 | from dpdispatcher.submission import Resources, Submission, Task
 7 | 
 8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 9 | 
10 | # task_need_resources has no effect
11 | with open("jsons/machine_lsf.json") as f:
12 |     mdata = json.load(f)
13 | 
14 | machine = Machine.load_from_dict(mdata["machine"])
15 | resources = Resources.load_from_dict(mdata["resources"])
16 | 
17 | submission = Submission(
18 |     work_base="0_md/",
19 |     machine=machine,
20 |     resources=resources,
21 |     forward_common_files=["graph.pb"],
22 |     backward_common_files=[],
23 | )
24 | 
25 | task1 = Task(
26 |     command="lmp -i input.lammps",
27 |     task_work_path="bct-1/",
28 |     forward_files=["conf.lmp", "input.lammps"],
29 |     backward_files=["log.lammps"],
30 | )
31 | task2 = Task(
32 |     command="lmp -i input.lammps",
33 |     task_work_path="bct-2/",
34 |     forward_files=["conf.lmp", "input.lammps"],
35 |     backward_files=["log.lammps"],
36 | )
37 | task3 = Task(
38 |     command="lmp -i input.lammps",
39 |     task_work_path="bct-3/",
40 |     forward_files=["conf.lmp", "input.lammps"],
41 |     backward_files=["log.lammps"],
42 | )
43 | task4 = Task(
44 |     command="lmp -i input.lammps",
45 |     task_work_path="bct-4/",
46 |     forward_files=["conf.lmp", "input.lammps"],
47 |     backward_files=["log.lammps"],
48 | )
49 | submission.register_task_list(
50 |     [
51 |         task1,
52 |         task2,
53 |         task3,
54 |         task4,
55 |     ]
56 | )
57 | submission.run_submission(clean=True)
58 | 


--------------------------------------------------------------------------------
/tests/devel_test_shell.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 5 | 
 6 | from dpdispatcher.local_context import LocalContext, LocalSession
 7 | 
 8 | # from dpdispatcher.pbs import PBS
 9 | from dpdispatcher.shell import Shell
10 | from dpdispatcher.submission import Resources, Submission, Task
11 | 
12 | local_session = LocalSession({"work_path": "temp2"})
13 | local_context = LocalContext(local_root="test_shell_dir/", work_profile=local_session)
14 | # lazy_local_context = LazyLocalContext(local_root='/home/fengbo/10_dpdispatcher/dpdispatcher/tests/temp3/0_md', work_profile=None)
15 | shell = Shell(context=local_context)
16 | # pbs = PBS(context=lazy_local_context)
17 | 
18 | resources = Resources(
19 |     number_node=1, cpu_per_node=4, gpu_per_node=1, queue_name="V100_8_32", group_size=4
20 | )
21 | submission = Submission(
22 |     work_base="0_md",
23 |     resources=resources,
24 |     forward_common_files=["graph.pb"],
25 |     backward_common_files=["submission.json"],
26 | )  # ,  batch=PBS)
27 | task1 = Task(
28 |     command="lmp_serial -i input.lammps",
29 |     task_work_path="bct-1",
30 |     forward_files=["conf.lmp", "input.lammps"],
31 |     backward_files=["log.lammps"],
32 |     task_need_resources=1,
33 | )
34 | task2 = Task(
35 |     command="lmp_serial -i input.lammps",
36 |     task_work_path="bct-2",
37 |     forward_files=["conf.lmp", "input.lammps"],
38 |     backward_files=["log.lammps"],
39 |     task_need_resources=0.25,
40 | )
41 | task3 = Task(
42 |     command="lmp_serial -i input.lammps",
43 |     task_work_path="bct-3",
44 |     forward_files=["conf.lmp", "input.lammps"],
45 |     backward_files=["log.lammps"],
46 |     task_need_resources=0.25,
47 | )
48 | task4 = Task(
49 |     command="lmp_serial -i input.lammps",
50 |     task_work_path="bct-4",
51 |     forward_files=["conf.lmp", "input.lammps"],
52 |     backward_files=["log.lammps"],
53 |     task_need_resources=0.5,
54 | )
55 | submission.register_task_list(
56 |     [
57 |         task1,
58 |         task2,
59 |         task3,
60 |         task4,
61 |     ]
62 | )
63 | submission.generate_jobs()
64 | submission.bind_batch(batch=shell)
65 | # for job in submission.belonging_jobs:
66 | #     job.job_to_json()
67 | # print('111', submission)
68 | # submission2 = Submission.recover_jobs_from_json('./jr.json')
69 | # print('222', submission2)
70 | # print(submission==submission2)
71 | submission.run_submission()
72 | 
73 | # submission1.dump_jobs_fo_json()
74 | # submission2 = Submission.submission_from_json('jsons/submission.json')
75 | # print(677, submission==submission2)
76 | # print(submission1.belonging_jobs)
77 | # print(local_context)
78 | 


--------------------------------------------------------------------------------
/tests/devel_test_slurm.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | # from .context import dpdispatcher
 7 | # from dpdispatcher.local_context import LocalContext
 8 | from dpdispatcher.machine import Machine
 9 | from dpdispatcher.submission import Resources, Submission, Task
10 | 
11 | # from dpdispatcher.ssh_context import SSHContext
12 | 
13 | 
14 | # from dpdispatcher.submission import
15 | # from dpdispatcher.slurm import Slurm
16 | 
17 | # local_session = LocalSession({'work_path':'temp2'})
18 | # local_context = LocalContext(local_root='test_slurm_dir/', work_profile=local_session)
19 | # lazy_local_context = LazyLocalContext(local_root='test_slurm_dir/')
20 | 
21 | 
22 | # machine_dict = dict(hostname='localhost', remote_root='/home/dp/dpdispatcher/tests/temp2', username='dp')
23 | # ssh_session = SSHSession(**machine_dict)
24 | # ssh_session = SSHSession(hostname='8.131.233.55', remote_root='/home/dp/dp_remote', username='dp')
25 | # ssh_context = SSHContext(local_root='test_slurm_dir', ssh_session=ssh_session)
26 | # slurm = Slurm(context=ssh_context)
27 | # slurm = Slurm(context=lazy_local_context)
28 | 
29 | # resources = Resources(number_node=1, cpu_per_node=4, gpu_per_node=2, queue_name="GPU_2080Ti", group_size=4,
30 | #     custom_flags=['#SBATCH --exclude=2080ti000,2080ti001,2080ti002,2080ti004,2080ti005,2080ti006'],
31 | #     para_deg=2,
32 | #     strategy={"if_cuda_multi_devices":True})
33 | # slurm_sbatch_dict={'mem': '10G', 'cpus_per_task':1, 'time': "120:0:0"}
34 | # slurm_resources = SlurmResources(resources=resources, slurm_sbatch_dict=slurm_sbatch_dict)
35 | 
36 | with open("jsons/machine_slurm.json") as f:
37 |     mdata = json.load(f)
38 | 
39 | machine = Machine.load_from_dict(mdata["machine"])
40 | resources = Resources.load_from_dict(mdata["resources"])
41 | 
42 | submission = Submission(
43 |     work_base="0_md/",
44 |     machine=machine,
45 |     resources=resources,
46 |     forward_common_files=["graph.pb"],
47 |     backward_common_files=[],
48 | )  # ,  batch=PBS)
49 | task1 = Task(
50 |     command="lmp -i input.lammps",
51 |     task_work_path="bct-1/",
52 |     forward_files=["conf.lmp", "input.lammps"],
53 |     backward_files=["log.lammps"],
54 | )
55 | task2 = Task(
56 |     command="lmp -i input.lammps",
57 |     task_work_path="bct-2/",
58 |     forward_files=["conf.lmp", "input.lammps"],
59 |     backward_files=["log.lammps"],
60 | )
61 | task3 = Task(
62 |     command="lmp -i input.lammps",
63 |     task_work_path="bct-3/",
64 |     forward_files=["conf.lmp", "input.lammps"],
65 |     backward_files=["log.lammps"],
66 | )
67 | task4 = Task(
68 |     command="lmp -i input.lammps",
69 |     task_work_path="bct-4/",
70 |     forward_files=["conf.lmp", "input.lammps"],
71 |     backward_files=["log.lammps"],
72 | )
73 | submission.register_task_list(
74 |     [
75 |         task1,
76 |         task2,
77 |         task3,
78 |         task4,
79 |     ]
80 | )
81 | submission.run_submission(clean=True)
82 | 


--------------------------------------------------------------------------------
/tests/devel_test_ssh_ali_ehpc.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | 
 7 | # from dpdispatcher.local_context import LocalSession
 8 | # from dpdispatcher.local_context import LocalContext
 9 | 
10 | # from dpdispatcher.batch import Batch
11 | # from dpdispatcher.pbs import PBS
12 | from dpdispatcher.batch_object import BatchObject
13 | from dpdispatcher.submission import Resources, Submission, Task
14 | 
15 | with open("ssh_machine.json") as f:
16 |     jdata = json.load(f)
17 | 
18 | batch = BatchObject(jdata=jdata)
19 | # local_session = LocalSession({'work_path':'temp2'})
20 | # local_context = LocalContext(local_root='temp1/', work_profile=local_session)
21 | # lazy_local_context = LazyLocalContext(local_root='/home/fengbo/10_dpdispatcher/dpdispatcher/tests/temp3/0_md', work_profile=None)
22 | # pbs = PBS(context=lazy_local_context)
23 | # ssh_session = SSHSession(hostname='39.106.84.25', remote_root='/home/fengbo/dp_remote', username='fengbo')
24 | # ssh_context = SSHContext(local_root='test_slurm_dir', ssh_session=ssh_session)
25 | # jdata =
26 | 
27 | # pbs = PBS(context=ssh_context)
28 | 
29 | resources = Resources(
30 |     number_node=1, cpu_per_node=4, gpu_per_node=1, queue_name="V100_8_32", group_size=4
31 | )
32 | submission = Submission(
33 |     work_base="test_pbs_dir/0_md",
34 |     resources=resources,
35 |     forward_common_files=["graph.pb"],
36 |     backward_common_files=[],
37 | )  # ,  batch=PBS)
38 | task1 = Task(
39 |     command="lmp_serial -i input.lammps",
40 |     task_work_path="bct-1",
41 |     forward_files=["conf.lmp", "input.lammps"],
42 |     backward_files=["log.lammps"],
43 | )
44 | task2 = Task(
45 |     command="lmp_serial -i input.lammps",
46 |     task_work_path="bct-2",
47 |     forward_files=["conf.lmp", "input.lammps"],
48 |     backward_files=["log.lammps"],
49 | )
50 | task3 = Task(
51 |     command="lmp_serial -i input.lammps",
52 |     task_work_path="bct-3",
53 |     forward_files=["conf.lmp", "input.lammps"],
54 |     backward_files=["log.lammps"],
55 | )
56 | task4 = Task(
57 |     command="lmp_serial -i input.lammps",
58 |     task_work_path="bct-4",
59 |     forward_files=["conf.lmp", "input.lammps"],
60 |     backward_files=["log.lammps"],
61 | )
62 | submission.register_task_list(
63 |     [
64 |         task1,
65 |         task2,
66 |         task3,
67 |         task4,
68 |     ]
69 | )
70 | submission.generate_jobs()
71 | submission.bind_batch(batch=batch)
72 | # for job in submission.belonging_jobs:
73 | #     job.job_to_json()
74 | # print('111', submission)
75 | # submission2 = Submission.recover_jobs_from_json('./jr.json')
76 | # print('222', submission2)
77 | # print(submission==submission2)
78 | submission.run_submission()
79 | 
80 | # submission1.dump_jobs_fo_json()
81 | # submission2 = Submission.submission_from_json('jsons/submission.json')
82 | # print(677, submission==submission2)
83 | # print(submission1.belonging_jobs)
84 | # print(local_context)
85 | 


--------------------------------------------------------------------------------
/tests/graph.pb:
--------------------------------------------------------------------------------
1 | # mock file 
2 | # origin file Sn SCAN functional label:continue-2/000 by yfb222333@gmail.com; github:felix5572
3 | # model version 1.2: wget https://deepmd-kit.oss-cn-beijing.aliyuncs.com/graph.pb
4 | # model version 2.0: wget https://deepmd-kit.oss-cn-beijing.aliyuncs.com/graph_Sn_convert_from_1.2_to_2.0.pb
5 | 


--------------------------------------------------------------------------------
/tests/hello_world.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # # dpdispatcher doesn't use `requires-python` and `dependencies`
 3 | # requires-python = ">=3"
 4 | # dependencies = [
 5 | # ]
 6 | # [tool.dpdispatcher]
 7 | # work_base = "./"
 8 | # forward_common_files=[]
 9 | # backward_common_files=[]
10 | # [tool.dpdispatcher.machine]
11 | # batch_type = "Shell"
12 | # local_root = "./"
13 | # context_type = "LazyLocalContext"
14 | # [tool.dpdispatcher.resources]
15 | # number_node = 1
16 | # cpu_per_node = 1
17 | # gpu_per_node = 0
18 | # group_size = 0
19 | # [[tool.dpdispatcher.task_list]]
20 | # # no need to contain the script filename
21 | # command = "python"
22 | # # can be a glob pattern
23 | # task_work_path = "./"
24 | # forward_files = []
25 | # backward_files = ["log"]
26 | # ///
27 | 
28 | print("hello world!")
29 | 


--------------------------------------------------------------------------------
/tests/jsons/job.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bc1a7297489e921034ced5036cb23ef9daf7b681": {
 3 |         "job_task_list": [
 4 |             {
 5 |                 "command": "lmp -i input.lammps",
 6 |                 "task_work_path": "bct-3/",
 7 |                 "forward_files": [
 8 |                     "conf.lmp",
 9 |                     "input.lammps"
10 |                 ],
11 |                 "backward_files": [
12 |                     "log.lammps"
13 |                 ],
14 |                 "outlog": "log",
15 |                 "errlog": "err"
16 |             },
17 |             {
18 |                 "command": "lmp -i input.lammps",
19 |                 "task_work_path": "bct-2/",
20 |                 "forward_files": [
21 |                     "conf.lmp",
22 |                     "input.lammps"
23 |                 ],
24 |                 "backward_files": [
25 |                     "log.lammps"
26 |                 ],
27 |                 "outlog": "log",
28 |                 "errlog": "err"
29 |             }
30 |         ],
31 |         "resources": {
32 |             "number_node": 1,
33 |             "cpu_per_node": 4,
34 |             "gpu_per_node": 1,
35 |             "queue_name": "T4_4_15",
36 |             "group_size": 2,
37 |             "custom_flags": [],
38 |             "strategy": {
39 |                 "if_cuda_multi_devices": false
40 |             },
41 |             "para_deg": 1,
42 |             "module_unload_list": [],
43 |             "module_list": [],
44 |             "source_list": [],
45 |             "envs": {},
46 |             "kwargs": {}
47 |         },
48 |         "job_state": null,
49 |         "job_id": "",
50 |         "fail_count": 0
51 |     }
52 | }


--------------------------------------------------------------------------------
/tests/jsons/machine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "PBS",
 4 |         "context_type": "LocalContext",
 5 |         "local_root" : "./",
 6 |         "remote_root" : "./",
 7 |         "remote_profile": {}
 8 |     },
 9 |     "resources":{
10 |         "number_node": 1,
11 |         "cpu_per_node": 4,
12 |         "gpu_per_node": 1,
13 |         "queue_name": "T4_4_15",
14 |         "group_size": 5
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_JH_UniScheduler.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine": {
 3 |         "batch_type": "JH_UniScheduler",
 4 |         "context_type": "local",
 5 |         "local_root": "./",
 6 |         "remote_root": "/data/home/wangsimin/machine_learning/dpgen/task/test/dpgen_example/run1"
 7 |     },
 8 |     "resources":{
 9 |         "number_node": 1,
10 |         "cpu_per_node": 4,
11 |         "gpu_per_node": 1,
12 |         "queue_name": "gpu",
13 |         "group_size": 4,
14 |         "source_list": ["/public/software/deepmd-kit/bin/activate /public/software/deepmd-kit"]
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_ali_ehpc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "PBS",
 4 |         "context_type": "SSHContext",
 5 |         "local_root": "./test_context_dir",
 6 |         "remote_root": "/home/yuanfengbo/work_path_dpdispatcher_test",
 7 |         "remote_profile": {
 8 |             "hostname": "39.103.186.143",
 9 |             "username": "yuanfengbo"
10 |         }
11 |     },
12 |     "resources":{
13 |         "number_node": 1,
14 |         "cpu_per_node": 8,
15 |         "gpu_per_node": 1,
16 |         "queue_name": "V100_8_32",
17 |         "group_size": 5
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_center.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "batch_lazy_pbs":{
 3 |         "batch_type": "pbs",
 4 |         "context_type": "lazy_local",
 5 |         "local_root": "./"
 6 |     },
 7 |     "batch_local_slurm":{
 8 |         "batch_type": "slurm",
 9 |         "context_type": "local",
10 |         "local_root": "./",
11 |         "remote_root": "./"
12 |     },
13 |     "batch_ehpc_ssh_pbs":{
14 |         "batch_type":"pbs",
15 |         "context_type":"ssh",
16 |         "local_root": "./",
17 |         "remote_root": "~/dpdispatcher_work_dir",
18 |         "hostname": "39.106.xx.xxx",
19 |         "username": "user1"
20 |     },
21 |     "resources_gpu":{
22 |         "number_node": 1,
23 |         "cpu_per_node": 4,
24 |         "gpu_per_node": 1,
25 |         "queue_name": "T4_4_14",
26 |         "group_size": 5
27 |     },
28 |     "resources_cpu":{
29 |         "number_node": 1,
30 |         "cpu_per_node": 8,
31 |         "gpu_per_node": 0,
32 |         "queue_name": "C32_64",
33 |         "group_size": 5
34 |     }
35 | }


--------------------------------------------------------------------------------
/tests/jsons/machine_diffenert.json:
--------------------------------------------------------------------------------
 1 | {   
 2 |     "train":{
 3 |         "batch":{
 4 |             "batch_type": "pbs",
 5 |             "context_type": "lazy_local",
 6 |             "local_root": "./"
 7 |         },
 8 |         "resources":{
 9 |             "number_node": 1,
10 |             "cpu_per_node": 4,
11 |             "gpu_per_node": 1,
12 |             "queue_name": "T4_4_14",
13 |             "group_size": 5
14 |         }
15 |     },
16 |     "md":{
17 |         "batch":{
18 |             "batch_type": "Shell",
19 |             "context_type": "lazy_local",
20 |             "local_root": "./"
21 |         },
22 |         "resources":{
23 |             "number_node": null,
24 |             "cpu_per_node": null,
25 |             "gpu_per_node": 0,
26 |             "queue_name": null,
27 |             "group_size": 5
28 |         }
29 |     }
30 | }


--------------------------------------------------------------------------------
/tests/jsons/machine_dp_cloud_server.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "DpCloudServer",
 4 |         "context_type": "DpCloudServerContext",
 5 |         "local_root" : "./test_context_dir/",
 6 |         "remote_profile":{
 7 |             "username": "yfb222333",
 8 |             "password": "",
 9 |             "input_data":{
10 |                 "job_type": "indicate",
11 |                 "log_file": "dp_cloud_server.log",
12 |                 "command": "",
13 |                 "backward_files": [],
14 |                 "job_name": "dpgen_20210604_job",
15 |                 "machine": {
16 |                     "platform": "ali",
17 |                     "resources": {
18 |                         "gpu_type": "1 * NVIDIA P100",
19 |                         "cpu_num": 4,
20 |                         "mem_limit": 28,
21 |                         "disk_size": 100,
22 |                         "region": "cn-beijing",
23 |                         "time_limit": "2:00:00",
24 |                         "image_name": "yfb-deepmd-kit-1.2.4-cuda10"
25 |                         }
26 |                     },
27 |                 "job_resources": null
28 |             }
29 |         }
30 |     },
31 |     "resources": {
32 |         "number_node": 1,
33 |         "cpu_per_node": 4,
34 |         "gpu_per_node": 1,
35 |         "queue_name": "GPU",
36 |         "group_size": 5
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_fugaku.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine": {
 3 |         "batch_type": "fugaku",
 4 |         "context_type": "SSHContext",
 5 |         "local_root" : "./",
 6 |         "remote_profile": {
 7 |                 "hostname": "login.****.jp",
 8 |                 "key_filename":"/home/***/.ssh/***",
 9 |                 "passphrase":"******",
10 |                 "username": "u*****"
11 |         },
12 |         "remote_root": "/vol*****/data/****"
13 | },
14 |     "resources": {
15 |             "number_node": 1,
16 |             "cpu_per_node": 48,
17 |             "source_list": [""],
18 |             "queue_name": "small",
19 |             "group_size": 1,
20 |             "custom_flags" : ["#PJM -L \"elapse=4:00:00\"",
21 |             "#PJM -x PJM_LLIO_GFSCACHE=/vol0004",
22 |             "#PJM -g hp******"]
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_if_cuda_multi_devices.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "Shell",
 4 |         "context_type": "LocalContext",
 5 |         "local_root" : "test_if_cuda_multi_devices/",
 6 |         "remote_root" : "tmp_if_cuda_multi_devices/",
 7 |         "remote_profile":{}
 8 |     },
 9 |     "resources":{
10 |         "number_node": 1,
11 |         "cpu_per_node": 4,
12 |         "gpu_per_node": 4,
13 |         "queue_name": "GPU_2080Ti",
14 |         "group_size": 16,
15 |         "para_deg": 2,
16 |         "strategy": {
17 |             "if_cuda_multi_devices": true
18 |         }
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_lazy_local_jh_unischeduler.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"machine": {
 3 | 		"batch_type": "JH_UniScheduler",
 4 | 		"context_type": "LazyLocalContext",
 5 | 		"local_root": "./test_jh_unischeduler"
 6 | 	},
 7 | 	"resources": {
 8 | 		"number_node": 1,
 9 | 		"cpu_per_node": 4,
10 | 		"queue_name": "gpu",
11 | 		"gpu_per_node": 1,
12 | 		"group_size": 4,
13 | 		"strategy": {
14 |             "if_cuda_multi_devices": false
15 |         },
16 | 		"source_list": ["./slurm_test.env"]
17 | 	}
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_lazy_local_lsf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "LSF",
 4 |         "context_type": "LazyLocalContext",
 5 |         "local_root": "./test_lsf_dir"
 6 |     },
 7 |     "resources":{
 8 |         "number_node": 1,
 9 |         "cpu_per_node": 4,
10 |         "gpu_per_node": 1,
11 |         "queue_name": "gpu",
12 |         "group_size": 4,
13 |         "custom_flags": [
14 |             "#BSUB -R \"select[hname != g005]\"",
15 |             "#BSUB -W 24:00"
16 |         ],
17 |         "strategy": {
18 |             "if_cuda_multi_devices": false
19 |         },
20 |         "para_deg": 1,
21 |         "module_unload_list": [],
22 |         "module_purge": true,
23 |         "module_list": [
24 |             "use.own",
25 |             "deepmd/1.3"
26 |         ],
27 |         "source_list": [
28 |             "/data/home/ypliu/scripts/avail_gpu.sh",
29 |             "/data/home/ypliu/dprun/tf_envs.sh"
30 |         ],
31 |         "envs": {"DP_DISPATCHER_EXPORT": "test_foo_bar_baz"},
32 |         "prepend_script": [
33 |             "echo 'The summer you were there.'"
34 |         ],
35 |         "append_script": [
36 |             "echo 'shizuku'",
37 |             "echo 'kaori'"
38 |         ],
39 |         "kwargs": {
40 |             "gpu_usage": true,
41 |             "gpu_new_syntax": true,
42 |             "gpu_exclusive": false
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_lazy_local_slurm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "Slurm",
 4 |         "context_type": "LazyLocalContext",
 5 |         "local_root": "./test_slurm_dir"
 6 |         },
 7 |     "resources":{
 8 |         "number_node": 1,
 9 |         "cpu_per_node": 4,
10 |         "gpu_per_node": 2,
11 |         "queue_name": "GPU_2080Ti",
12 |         "group_size": 4,
13 |         "custom_flags": ["#SBATCH --nice=100", "#SBATCH --time=24:00:00"],
14 |         "strategy": {
15 |             "if_cuda_multi_devices": true
16 |         },
17 |         "para_deg": 2,
18 |         "module_unload_list": ["singularity"],
19 |         "module_list": ["singularity/3.0.0"],
20 |         "source_list": ["./slurm_test.env"],
21 |         "envs": {"DP_DISPATCHER_EXPORT_VAR": "test_foo_bar_baz"},
22 |         "custom_gpu_line":"#SBATCH --gres=gpu:2080Ti:2"
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_lazylocal_shell.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "Shell",
 4 |         "context_type": "LazyLocalContext",
 5 |         "local_root": "./test_shell_trival_dir"
 6 |     },
 7 |     "resources":{
 8 |         "number_node": 1,
 9 |         "cpu_per_node": 4,
10 |         "gpu_per_node": 0,
11 |         "queue_name": "CPU",
12 |         "group_size": 2
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_local_fugaku.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine": {
 3 |         "batch_type": "fugaku",
 4 |         "context_type": "local",
 5 |         "local_root" : "./",
 6 |         "remote_root": "./"
 7 |     },
 8 |     "resources": {
 9 |             "number_node": 1,
10 |             "cpu_per_node": 48,
11 |             "source_list": [""],
12 |             "queue_name": "small",
13 |             "group_size": 1,
14 |             "custom_flags" : ["#PJM -L \"elapse=4:00:00\"",
15 |             "#PJM -x PJM_LLIO_GFSCACHE=/vol0004",
16 |             "#PJM -g hp******"]
17 |     }
18 | }


--------------------------------------------------------------------------------
/tests/jsons/machine_local_shell.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "Shell",
 4 |         "context_type": "LocalContext",
 5 |         "local_root": "./test_shell_trival_dir",
 6 |         "remote_root": "./tmp_shell_trival_dir"
 7 |     },
 8 |     "resources":{
 9 |         "number_node": 1,
10 |         "cpu_per_node": 4,
11 |         "gpu_per_node": 0,
12 |         "queue_name": "CPU",
13 |         "group_size": 2
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_lsf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "LSF",
 4 |         "context_type": "SSHContext",
 5 |         "local_root": "./test_lsf_dir",
 6 |         "remote_root": "/data/home/ypliu/dptasks",
 7 |         "remote_profile": {
 8 |             "hostname": "123.45.78.99",
 9 |             "port": 56789,
10 |             "username": "ypliu"
11 |         }
12 |     },
13 |     "resources":{
14 |         "number_node": 1,
15 |         "cpu_per_node": 4,
16 |         "gpu_per_node": 1,
17 |         "queue_name": "gpu",
18 |         "group_size": 4,
19 |         "custom_flags": [
20 |             "#BSUB -R \"select[hname != g005]\"",
21 |             "#BSUB -W 24:00"
22 |         ],
23 |         "strategy": {
24 |             "if_cuda_multi_devices": false
25 |         },
26 |         "para_deg": 1,
27 |         "module_unload_list": [],
28 |         "module_list": [
29 |             "use.own",
30 |             "deepmd/1.3"
31 |         ],
32 |         "source_list": [
33 |             "/data/home/ypliu/scripts/avail_gpu.sh",
34 |             "/data/home/ypliu/dprun/tf_envs.sh"
35 |         ],
36 |         "envs": {"DP_DISPATCHER_EXPORT": "test_foo_bar_baz"},
37 |         "kwargs": {
38 |             "gpu_usage": true,
39 |             "gpu_new_syntax": true
40 |         }
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_openapi.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "command": "lmp -i in.lammps -v restart 0",
 3 |     "machine": {
 4 |         "batch_type": "OpenAPI",
 5 |         "context_type": "OpenAPIContext",
 6 |         "remote_profile": {
 7 |             "project_id": 154,
 8 |             "command": "lmp -i in.lammps -v restart 0",
 9 |             "job_name": "dpgen_lammps_job",
10 |             "machine_type":"c4_m15_1 * NVIDIA T4",
11 |             "image_address":"registry.dp.tech/dev/test/deepmd-kit:2.2.1-cuda11.6"
12 |         }
13 |     },
14 |     "resources": {
15 |         "group_size": 10
16 |     } 
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_slurm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine":{
 3 |         "batch_type": "Slurm",
 4 |         "context_type": "SSHContext",
 5 |         "local_root": "./test_context_dir",
 6 |         "remote_root": "/home/fengbo/work_path_dpdispatcher_test",
 7 |         "remote_profile": {
 8 |             "hostname": "xxx.200.xxx.59",
 9 |             "username": "fengbo"
10 |         }
11 |     },
12 |     "resources":{
13 |         "number_node": 1,
14 |         "cpu_per_node": 4,
15 |         "gpu_per_node": 2,
16 |         "queue_name": "GPU_2080Ti",
17 |         "group_size": 4,
18 |         "custom_flags": ["#SBATCH --nice=100", "#SBATCH --time=24:00:00"],
19 |         "strategy": {
20 |             "if_cuda_multi_devices": true
21 |         },
22 |         "para_deg": 2,
23 |         "module_unload_list": ["singularity"],
24 |         "module_list": ["singularity/3.0.0"],
25 |         "source_list": ["./slurm_test.env"],
26 |         "envs": {"DP_DISPATCHER_EXPORT": "test_foo_bar_baz"},
27 |         "kwargs": {}
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/jsons/machine_yarn.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "machine": {
 3 |       "batch_type": "DistributedShell",
 4 |       "context_type": "HDFSContext",
 5 |       "local_root": "./test_hdfs_dir",
 6 |       "remote_root": "/user/jenny/md/local_test"
 7 |     },
 8 |     "resources": {
 9 |       "number_node": 1,
10 |       "allow_failure": true,
11 |       "ratio_failue": 0.05,
12 |       "cpu_per_node": 32,
13 |       "gpu_per_node": 0,
14 |       "with_mpi": true,
15 |       "queue_name": "root.oryx_bigbang",
16 |       "cluster": "oryx",
17 |       "group_size": 1,
18 |       "source_list": ["/opt/intel/oneapi/setvars.sh"],
19 |       "kwargs": {
20 |       	"yarn_path": "/opt/tiger/yarn_deploy/hadoop/share/hadoop/yarn",
21 |       	"img_name": "hub.byted.org/md/dpgen_fp:v1",
22 |       	"mem_limit": 64
23 |       },
24 |       "envs" : {"HADOOP_HOME" : "${HADOOP_HOME:-/opt/tiger/yarn_deploy/hadoop}",
25 |       	"JAVA_HOME": "${JAVA_HOME:-/opt/tiger/jdk/jdk1.8}",
26 |       	"LD_LIBRARY_PATH": "${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native:${JAVA_HOME}/jre/lib/amd64/server",
27 |       	"CLASSPATH": "`${HADOOP_HOME}/bin/hadoop classpath --glob`",
28 |       	"PATH": "${HADOOP_HOME}/bin:/opt/vasp.5.4.4/bin:${PATH}"}
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/jsons/resources.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "number_node": 1,
 3 |     "cpu_per_node": 4,
 4 |     "gpu_per_node": 1,
 5 |     "queue_name": "T4_4_15",
 6 |     "group_size": 2,
 7 |     "custom_flags": [],
 8 |     "strategy": {
 9 |         "if_cuda_multi_devices": false
10 |     },
11 |     "para_deg": 1,
12 |     "module_unload_list": [],
13 |     "module_list": [],
14 |     "source_list": [],
15 |     "envs": {},
16 |     "wait_time": 0,
17 |     "kwargs": {}
18 | }


--------------------------------------------------------------------------------
/tests/jsons/task.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "command": "lmp -i input.lammps",
 3 |     "task_work_path": "bct-1/",
 4 |     "forward_files": [
 5 |         "conf.lmp",
 6 |         "input.lammps"
 7 |     ],
 8 |     "backward_files": [
 9 |         "log.lammps"
10 |     ],
11 |     "outlog": "log",
12 |     "errlog": "err"
13 | }


--------------------------------------------------------------------------------
/tests/script_gen_json.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | import os
 4 | import sys
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 7 | __package__ = "tests"
 8 | # from .context import LocalContext
 9 | 
10 | from .sample_class import SampleClass
11 | 
12 | task_dict = SampleClass.get_sample_task_dict()
13 | assert os.path.isfile("jsons/task.json") is False
14 | with open("jsons/task.json", "w") as f:
15 |     json.dump(task_dict, f, indent=4)
16 | 
17 | resources_dict = SampleClass.get_sample_resources_dict()
18 | assert os.path.isfile("jsons/resources.json") is False
19 | with open("jsons/resources.json", "w") as f:
20 |     json.dump(resources_dict, f, indent=4)
21 | 
22 | pbs = SampleClass.get_sample_pbs_local_context()
23 | submission = SampleClass.get_sample_submission()
24 | submission.bind_machine(machine=pbs)
25 | assert os.path.isfile("jsons/submission.json") is False
26 | with open("jsons/submission.json", "w") as f:
27 |     json.dump(submission.serialize(), f, indent=4)
28 | 
29 | job_dict = SampleClass.get_sample_job_dict()
30 | assert os.path.isfile("jsons/job.json") is False
31 | with open("jsons/job.json", "w") as f:
32 |     json.dump(job_dict, f, indent=4)
33 | 


--------------------------------------------------------------------------------
/tests/slurm_test.env:
--------------------------------------------------------------------------------
1 | export DP_DPDISPATCHER_TEST_VAR="dpdispatcher_foo_bar"
2 | 


--------------------------------------------------------------------------------
/tests/test_argcheck.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | __package__ = "tests"
 7 | 
 8 | from .context import Machine, Resources, Task, setUpModule  # noqa: F401
 9 | 
10 | 
11 | class TestJob(unittest.TestCase):
12 |     def test_machine_argcheck(self):
13 |         norm_dict = Machine.load_from_dict(
14 |             {
15 |                 "batch_type": "slurm",
16 |                 "context_type": "local",
17 |                 "local_root": "./",
18 |                 "remote_root": "/some/path",
19 |             }
20 |         ).serialize()
21 |         expected_dict = {
22 |             "batch_type": "Slurm",
23 |             "context_type": "LocalContext",
24 |             "local_root": "./",
25 |             "remote_root": "/some/path",
26 |             "remote_profile": {
27 |                 "symlink": True,
28 |             },
29 |             "clean_asynchronously": False,
30 |         }
31 |         self.assertDictEqual(norm_dict, expected_dict)
32 | 
33 |     def test_resources_argcheck(self):
34 |         norm_dict = Resources.load_from_dict(
35 |             {
36 |                 "number_node": 1,
37 |                 "cpu_per_node": 2,
38 |                 "gpu_per_node": 0,
39 |                 "queue_name": "haha",
40 |                 "group_size": 1,
41 |                 "envs": {
42 |                     "aa": "bb",
43 |                 },
44 |                 "kwargs": {
45 |                     "cc": True,
46 |                 },
47 |             }
48 |         ).serialize()
49 |         expected_dict = {
50 |             "append_script": [],
51 |             "cpu_per_node": 2,
52 |             "custom_flags": [],
53 |             "envs": {"aa": "bb"},
54 |             "gpu_per_node": 0,
55 |             "group_size": 1,
56 |             "kwargs": {
57 |                 "cc": True,
58 |             },
59 |             "module_list": [],
60 |             "module_purge": False,
61 |             "module_unload_list": [],
62 |             "number_node": 1,
63 |             "para_deg": 1,
64 |             "prepend_script": [],
65 |             "queue_name": "haha",
66 |             "source_list": [],
67 |             "strategy": {"if_cuda_multi_devices": False, "ratio_unfinished": 0.0},
68 |             "wait_time": 0,
69 |         }
70 |         self.assertDictEqual(norm_dict, expected_dict)
71 | 
72 |     def test_task_argcheck(self):
73 |         norm_dict = Task.load_from_dict(
74 |             {
75 |                 "command": "ls",
76 |                 "task_work_path": "./",
77 |                 "forward_files": [],
78 |                 "backward_files": [],
79 |                 "outlog": "out",
80 |                 "errlog": "err",
81 |             }
82 |         ).serialize()
83 |         expected_dict = {
84 |             "command": "ls",
85 |             "task_work_path": "./",
86 |             "forward_files": [],
87 |             "backward_files": [],
88 |             "outlog": "out",
89 |             "errlog": "err",
90 |         }
91 |         self.assertDictEqual(norm_dict, expected_dict)
92 | 


--------------------------------------------------------------------------------
/tests/test_class_job.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | __package__ = "tests"
 7 | # from .context import LocalSession
 8 | # from .context import LocalContext
 9 | from .context import (
10 |     Job,
11 |     Submission,
12 |     setUpModule,  # noqa: F401
13 | )
14 | from .sample_class import SampleClass
15 | 
16 | 
17 | class TestJob(unittest.TestCase):
18 |     def setUp(self):
19 |         self.job = SampleClass.get_sample_job()
20 | 
21 |         self.submission2 = Submission.submission_from_json("jsons/submission.json")
22 |         self.job2 = self.submission2.belonging_jobs[0]
23 | 
24 |     def test_eq(self):
25 |         self.assertTrue(self.job == self.job2)
26 | 
27 |     def test_get_hash(self):
28 |         self.assertEqual(self.job.get_hash(), self.job2.get_hash())
29 |         # self.assertEqual(self.submission, self.submission2)
30 | 
31 |     def test_serialize_deserialize(self):
32 |         self.assertEqual(self.job, Job.deserialize(job_dict=self.job.serialize()))
33 | 
34 |     def test_static_serialize(self):
35 |         self.assertNotIn(
36 |             "job_state", list(self.job.serialize(if_static=True).values())[0]
37 |         )
38 |         self.assertNotIn("job_id", list(self.job.serialize(if_static=True).values())[0])
39 |         self.assertNotIn(
40 |             "fail_count", list(self.job.serialize(if_static=True).values())[0]
41 |         )
42 | 
43 |     def test_get_job_state(self):
44 |         pass
45 | 
46 |     def test_handle_unexpected_job_state(self):
47 |         pass
48 | 
49 |     def test_register_job_id(self):
50 |         pass
51 | 
52 |     def test_submit_job(self):
53 |         pass
54 | 
55 |     def test_job_to_json(self):
56 |         pass
57 | 
58 | 
59 | #  def test_content_serialize(self):
60 | #      self.assertEqual(self.job.content_serialize(), self.job.serialize()[self.job.job_hash])
61 | 


--------------------------------------------------------------------------------
/tests/test_class_machine.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | __package__ = "tests"
 7 | 
 8 | from .context import (
 9 |     PBS,
10 |     Machine,
11 |     setUpModule,  # noqa: F401
12 | )
13 | from .sample_class import SampleClass
14 | 
15 | 
16 | class TestMachineInit(unittest.TestCase):
17 |     def setUp(self):
18 |         self.maxDiff = None
19 | 
20 |     def test_machine_serialize_deserialize(self):
21 |         pbs = SampleClass.get_sample_pbs_local_context()
22 |         self.assertEqual(pbs, Machine.deserialize(pbs.serialize()))
23 | 
24 |     def test_machine_load_from_dict(self):
25 |         pbs = SampleClass.get_sample_pbs_local_context()
26 |         self.assertEqual(pbs, PBS.load_from_dict(pbs.serialize()))
27 | 


--------------------------------------------------------------------------------
/tests/test_class_resources.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | import unittest
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 7 | __package__ = "tests"
 8 | # from .context import LocalSession
 9 | # from .context import LocalContext
10 | from .context import (
11 |     Resources,
12 |     setUpModule,  # noqa: F401
13 | )
14 | from .sample_class import SampleClass
15 | 
16 | 
17 | class TestResources(unittest.TestCase):
18 |     def setUp(self):
19 |         self.maxDiff = None
20 |         self.resources = SampleClass.get_sample_resources()
21 |         self.resources_dict = SampleClass.get_sample_resources_dict()
22 | 
23 |     def test_eq(self):
24 |         self.assertEqual(self.resources, SampleClass.get_sample_resources())
25 | 
26 |     def test_serialize(self):
27 |         self.assertEqual(self.resources.serialize(), self.resources_dict)
28 | 
29 |     def test_deserialize(self):
30 |         resources = Resources.deserialize(resources_dict=self.resources_dict)
31 |         self.assertEqual(self.resources, resources)
32 | 
33 |     def test_serialize_deserialize(self):
34 |         self.assertEqual(
35 |             self.resources,
36 |             Resources.deserialize(resources_dict=self.resources.serialize()),
37 |         )
38 | 
39 |     def test_resources_json(self):
40 |         with open("jsons/resources.json") as f:
41 |             resources_json_dict = json.load(f)
42 |         self.assertTrue(resources_json_dict, self.resources_dict)
43 |         self.assertTrue(resources_json_dict, self.resources.serialize())
44 | 
45 |     def test_arginfo(self):
46 |         self.resources.arginfo()
47 | 
48 |     def test_load_from_json(self):
49 |         resources = Resources.load_from_json("jsons/resources.json")
50 |         self.assertTrue(resources, self.resources)
51 | 


--------------------------------------------------------------------------------
/tests/test_class_submission.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | import unittest
  5 | from unittest.mock import MagicMock, patch
  6 | 
  7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
  8 | __package__ = "tests"
  9 | from .context import (
 10 |     JobStatus,
 11 |     Submission,
 12 |     setUpModule,  # noqa: F401
 13 | )
 14 | from .sample_class import SampleClass
 15 | 
 16 | 
 17 | class TestSubmission(unittest.TestCase):
 18 |     def setUp(self):
 19 |         self.maxDiff = None
 20 |         pbs = SampleClass.get_sample_pbs_local_context()
 21 |         self.submission = SampleClass.get_sample_submission()
 22 |         self.submission.bind_machine(machine=pbs)
 23 | 
 24 |         #  self.submission2 = Submission.submission_from_json('jsons/submission.json')
 25 |         # self.submission2 = Submission.submission_from_json('jsons/submission.json')
 26 | 
 27 |     def test_serialize_deserialize(self):
 28 |         self.assertEqual(
 29 |             self.submission.serialize(),
 30 |             Submission.deserialize(
 31 |                 submission_dict=self.submission.serialize()
 32 |             ).serialize(),
 33 |         )
 34 | 
 35 |     def test_get_hash(self):
 36 |         pass
 37 | 
 38 |     def test_bind_machine(self):
 39 |         self.assertIsNotNone(self.submission.machine.context.submission)
 40 |         for job in self.submission.belonging_jobs:
 41 |             self.assertIsNotNone(job.machine)
 42 | 
 43 |     def test_get_submision_state(self):
 44 |         pass
 45 | 
 46 |     def test_handle_unexpected_submission_state(self):
 47 |         pass
 48 | 
 49 |     def test_submit_submission(self):
 50 |         pass
 51 | 
 52 |     def test_upload_jobs(self):
 53 |         pass
 54 | 
 55 |     def test_download_jobs(self):
 56 |         pass
 57 | 
 58 |     def test_submission_to_json(self):
 59 |         pass
 60 | 
 61 |     @patch("dpdispatcher.Submission.submission_to_json")
 62 |     @patch("dpdispatcher.Submission.update_submission_state")
 63 |     def test_check_all_finished(
 64 |         self, patch_update_submission_state, patch_submission_to_json
 65 |     ):
 66 |         patch_update_submission_state = MagicMock(return_value=None)
 67 |         patch_submission_to_json = MagicMock(return_value=None)
 68 | 
 69 |         self.submission.belonging_jobs[0].job_state = JobStatus.running
 70 |         self.submission.belonging_jobs[1].job_state = JobStatus.waiting
 71 |         self.assertFalse(self.submission.check_all_finished())
 72 | 
 73 |         self.submission.belonging_jobs[0].job_state = JobStatus.finished
 74 |         self.submission.belonging_jobs[1].job_state = JobStatus.unsubmitted
 75 |         self.assertFalse(self.submission.check_all_finished())
 76 | 
 77 |         self.submission.belonging_jobs[0].job_state = JobStatus.completing
 78 |         self.submission.belonging_jobs[1].job_state = JobStatus.finished
 79 |         self.assertFalse(self.submission.check_all_finished())
 80 | 
 81 |         self.submission.belonging_jobs[0].job_state = JobStatus.finished
 82 |         self.submission.belonging_jobs[1].job_state = JobStatus.unknown
 83 |         self.assertFalse(self.submission.check_all_finished())
 84 | 
 85 |         self.submission.belonging_jobs[0].job_state = JobStatus.finished
 86 |         self.submission.belonging_jobs[1].job_state = JobStatus.finished
 87 |         self.assertTrue(self.submission.check_all_finished())
 88 | 
 89 |     def test_submission_from_json(self):
 90 |         submission2 = Submission.submission_from_json("jsons/submission.json")
 91 |         # print('<<<<<<<', self.submission)
 92 |         # print('>>>>>>>', submission2)
 93 |         self.assertEqual(self.submission.serialize(), submission2.serialize())
 94 | 
 95 |     def test_submission_json(self):
 96 |         with open("jsons/submission.json") as f:
 97 |             submission_json_dict = json.load(f)
 98 |         self.assertTrue(submission_json_dict, self.submission.serialize())
 99 | 
100 |     def test_try_recover_from_json(self):
101 |         pass
102 | 
103 |     def test_repr(self):
104 |         submission_repr = repr(self.submission)
105 |         j = json.dumps(self.submission.serialize(), indent=4)
106 |         self.assertEqual(submission_repr, j)
107 |         # self.submission_to_json()
108 | 
109 |     def test_clean(self):
110 |         pass
111 | 


--------------------------------------------------------------------------------
/tests/test_class_submission_init.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | __package__ = "tests"
 7 | from .context import setUpModule  # noqa: F401
 8 | from .sample_class import SampleClass
 9 | 
10 | # print('in', SampleClass.get_sample_empty_submission())
11 | 
12 | 
13 | class TestSubmissionInit(unittest.TestCase):
14 |     def setUp(self):
15 |         self.maxDiff = None
16 |         # self.empty_submission = SampleClass.get_sample_empty_submission()
17 |         # print('TestSubmissionInit.setUp:self.empty_submission.belonging_tasks', self.empty_submission.belonging_tasks)
18 | 
19 |     def test_reigister_task(self):
20 |         empty_submission = SampleClass.get_sample_empty_submission()
21 |         task = SampleClass.get_sample_task()
22 |         # print('TestSubmissionInit.test_reigister_task:self.empty_submission.belonging_tasks', empty_submission.belonging_tasks)
23 |         empty_submission.register_task(task=task)
24 |         # print('7890809', SampleClass.get_sample_empty_submission().belonging_tasks)
25 |         self.assertEqual([task], empty_submission.belonging_tasks)
26 | 
27 |     def test_reigister_task_whether_copy(self):
28 |         empty_submission = SampleClass.get_sample_empty_submission()
29 |         task = SampleClass.get_sample_task()
30 |         empty_submission.register_task(task=task)
31 |         empty_submission2 = SampleClass.get_sample_empty_submission()
32 |         self.assertEqual(empty_submission2.belonging_tasks, [])
33 | 
34 |         # empty_submission =
35 | 
36 |     # def test_reigister_task_list(self):
37 |     #     pass
38 | 
39 | 
40 | # print('out', SampleClass.get_sample_empty_submission())
41 | # print('TestSubmissionInit.test_register_task_list:task_list', task_list)
42 | # empty_submission = SampleClass.get_sample_empty_submission()
43 | # task_list = SampleClass.get_sample_task_list()
44 | # empty_submission.register_task_list(task_list=task_list)
45 | # self.empty_submission.register_task_list(task_list=task_list)
46 | # self.assertEqual(task_list, empty_submission.belonging_tasks)
47 | 
48 | # def tesk_generate_jobs(self):
49 | #     task_list = SampleClass.get_sample_task_list()
50 | #     self.submission.register_task_list(task_list=task_list)
51 | #     self.submission.generate_jobs()
52 | #     task1, task2, task3, task4 = task_list
53 | #     task_ll = [job.job_task_list for job in self.submission.belonging_jobs]
54 | #     self.assertEqual([[task3, task2], [task4, task1]], task_ll)
55 | 


--------------------------------------------------------------------------------
/tests/test_class_task.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | import unittest
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 7 | __package__ = "tests"
 8 | # from .context import LocalContext
 9 | 
10 | # from .context import Dispatcher
11 | from .context import (
12 |     Task,
13 |     setUpModule,  # noqa: F401
14 | )
15 | from .sample_class import SampleClass
16 | 
17 | 
18 | class TestTask(unittest.TestCase):
19 |     def setUp(self):
20 |         self.task = SampleClass.get_sample_task()
21 |         self.task_dict = SampleClass.get_sample_task_dict()
22 | 
23 |     def test_serialize(self):
24 |         self.assertEqual(self.task.serialize(), self.task_dict)
25 | 
26 |     def test_deserialize(self):
27 |         task = Task.deserialize(task_dict=self.task_dict)
28 |         self.assertTrue(task, self.task)
29 | 
30 |     def test_serialize_deserialize(self):
31 |         self.assertEqual(Task.deserialize(task_dict=self.task.serialize()), self.task)
32 | 
33 |     def test_task_json(self):
34 |         with open("jsons/task.json") as f:
35 |             task_json_dict = json.load(f)
36 |         self.assertTrue(task_json_dict, self.task_dict)
37 |         self.assertTrue(task_json_dict, self.task.serialize())
38 | 
39 |     def test_repr(self):
40 |         task_repr = repr(self.task)
41 |         print("debug:", task_repr, self.task_dict)
42 |         self.assertEqual(task_repr, str(self.task_dict))
43 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | import subprocess as sp
 2 | import unittest
 3 | 
 4 | 
 5 | class TestCLI(unittest.TestCase):
 6 |     def test_cli(self):
 7 |         sp.check_output(["dpdisp", "-h"])
 8 |         for subcommand in (
 9 |             "submission",
10 |             "gui",
11 |             "run",
12 |         ):
13 |             sp.check_output(["dpdisp", subcommand, "-h"])
14 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-1/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-1/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-1/some_dir/some_file:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/dpdispatcher/4816095c9e711259877fb90023ce74ce527ba5c3/tests/test_context_dir/0_md/bct-1/some_dir/some_file


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-2/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-2/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-3/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-3/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-4/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/bct-4/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/dir with space/file with space:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/dpdispatcher/4816095c9e711259877fb90023ce74ce527ba5c3/tests/test_context_dir/0_md/dir with space/file with space


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/graph.pb:
--------------------------------------------------------------------------------
1 | ../../graph.pb


--------------------------------------------------------------------------------
/tests/test_context_dir/0_md/some_dir/some_file:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/dpdispatcher/4816095c9e711259877fb90023ce74ce527ba5c3/tests/test_context_dir/0_md/some_dir/some_file


--------------------------------------------------------------------------------
/tests/test_examples.py:
--------------------------------------------------------------------------------
 1 | """This module ensures input in the examples directory
 2 | could pass the argument checking.
 3 | """
 4 | 
 5 | import json
 6 | import unittest
 7 | from pathlib import Path
 8 | from typing import Sequence, Tuple
 9 | 
10 | from dargs import Argument
11 | 
12 | from dpdispatcher.arginfo import machine_dargs, resources_dargs, task_dargs
13 | 
14 | # directory of examples
15 | p_examples = Path(__file__).parent.parent / "examples"
16 | 
17 | machine_args = machine_dargs()
18 | resources_args = resources_dargs(detail_kwargs=False)
19 | task_args = task_dargs()
20 | 
21 | # input_files : tuple[tuple[Argument, Path]]
22 | #   tuple of example list
23 | input_files: Sequence[Tuple[Argument, Path]] = (
24 |     (machine_args, p_examples / "machine" / "expanse.json"),
25 |     (machine_args, p_examples / "machine" / "lazy_local.json"),
26 |     (machine_args, p_examples / "machine" / "mandu.json"),
27 |     (machine_args, p_examples / "machine" / "ssh_proxy_command.json"),
28 |     (resources_args, p_examples / "resources" / "expanse_cpu.json"),
29 |     (resources_args, p_examples / "resources" / "mandu.json"),
30 |     (resources_args, p_examples / "resources" / "tiger.json"),
31 |     (task_args, p_examples / "task" / "deepmd-kit.json"),
32 |     (task_args, p_examples / "task" / "g16.json"),
33 | )
34 | 
35 | 
36 | class TestExamples(unittest.TestCase):
37 |     def test_arguments(self):
38 |         for arginfo, fn in input_files:
39 |             fn = str(fn)
40 |             with self.subTest(fn=fn):
41 |                 with open(fn) as f:
42 |                     data = json.load(f)
43 |                 arginfo.check_value(data, strict=True)
44 | 


--------------------------------------------------------------------------------
/tests/test_group_size.py:
--------------------------------------------------------------------------------
 1 | """Test `Submission.generate_jobs` with different group size."""
 2 | 
 3 | import json
 4 | import os
 5 | import sys
 6 | from pathlib import Path
 7 | from unittest import TestCase
 8 | 
 9 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
10 | __package__ = "tests"
11 | from .context import (
12 |     Machine,
13 |     Resources,
14 |     Submission,
15 |     Task,
16 |     setUpModule,  # noqa: F401
17 | )
18 | 
19 | # 99 tasks in total
20 | # group_size - expected_ntasks
21 | group_ntasks_pairs = [
22 |     (1, 99),
23 |     (3, 33),
24 |     (10, 10),
25 |     (100, 1),
26 |     (0, 1),
27 | ]
28 | 
29 | cwd = Path(__file__).parent
30 | with open(cwd / "jsons" / "machine.json") as f:
31 |     j_machine = json.load(f)["machine"]
32 | with open(cwd / "jsons" / "resources.json") as f:
33 |     j_resources = json.load(f)
34 | with open(cwd / "jsons" / "task.json") as f:
35 |     j_task = json.load(f)
36 | 
37 | 
38 | class TestGroupSize(TestCase):
39 |     def test_works_as_expected(self):
40 |         for group_size, ntasks in group_ntasks_pairs:
41 |             with self.subTest(group_size):
42 |                 machine = Machine.load_from_dict(j_machine)
43 |                 j_resources["group_size"] = group_size
44 |                 resources = Resources.load_from_dict(j_resources)
45 |                 tasks = [Task.load_from_dict(j_task) for _ in range(99)]
46 |                 submission = Submission(".", machine, resources, task_list=tasks)
47 |                 submission.generate_jobs()
48 |                 self.assertEqual(len(submission.belonging_jobs), ntasks)
49 | 


--------------------------------------------------------------------------------
/tests/test_gui.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: LGPL-3.0-or-later
 2 | import unittest
 3 | 
 4 | from dpgui import (
 5 |     generate_dpgui_templates,
 6 | )
 7 | 
 8 | 
 9 | class TestDPGUI(unittest.TestCase):
10 |     def test_dpgui_entrypoints(self):
11 |         self.assertTrue(len(generate_dpgui_templates()) > 0)
12 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_context.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import sys
 5 | import tarfile
 6 | import unittest
 7 | from glob import glob
 8 | 
 9 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
10 | __package__ = "tests"
11 | 
12 | from .context import (
13 |     HDFS,
14 |     HDFSContext,
15 |     Machine,
16 |     setUpModule,  # noqa: F401
17 | )
18 | from .sample_class import SampleClass
19 | 
20 | 
21 | @unittest.skipIf(not shutil.which("hadoop"), "requires hadoop")
22 | class TestHDFSContext(unittest.TestCase):
23 |     @classmethod
24 |     def setUpClass(cls):
25 |         with open("jsons/machine_yarn.json") as f:
26 |             mdata = json.load(f)
27 |         cls.machine = Machine.load_from_dict(mdata["machine"])
28 |         cls.submission = SampleClass.get_sample_submission()
29 |         cls.submission.bind_machine(cls.machine)
30 |         cls.submission_hash = cls.submission.submission_hash
31 | 
32 |     def setUp(self):
33 |         self.context = self.__class__.machine.context
34 | 
35 |     def test_0_hdfs_context(self):
36 |         self.assertIsInstance(self.context, HDFSContext)
37 | 
38 |     def test_1_upload(self):
39 |         self.context.upload(self.__class__.submission)
40 | 
41 |     def test_2_fake_run(self):
42 |         rfile_tgz = (
43 |             self.context.remote_root
44 |             + "/"
45 |             + self.context.submission.submission_hash
46 |             + "_upload.tgz"
47 |         )
48 |         tmp_dir = "./tmp_fake_run"
49 |         if os.path.exists(tmp_dir):
50 |             shutil.rmtree(tmp_dir)
51 |         os.mkdir(tmp_dir)
52 |         self.assertTrue(HDFS.copy_to_local(rfile_tgz, tmp_dir))
53 | 
54 |         cwd = os.getcwd()
55 |         os.chdir(tmp_dir)
56 |         tgz_file_list = glob("*_upload.tgz")
57 |         for tgz in tgz_file_list:
58 |             with tarfile.open(tgz, "r:gz") as tar:
59 |                 tar.extractall()
60 |             os.remove(tgz)
61 | 
62 |         file_list = [
63 |             "bct-1/log.lammps",
64 |             "bct-2/log.lammps",
65 |             "bct-3/log.lammps",
66 |             "bct-4/log.lammps",
67 |         ]
68 |         for fname in file_list:
69 |             with open(fname, "w") as fp:
70 |                 fp.write("# mock log")
71 | 
72 |         file_list = glob("./*")
73 |         download_tgz = self.context.submission.submission_hash + "_1_download.tar.gz"
74 |         with tarfile.open(download_tgz, "w:gz", dereference=True) as tar:
75 |             for ii in file_list:
76 |                 tar.add(ii)
77 |         ret, _ = HDFS.copy_from_local(download_tgz, self.context.remote_root)
78 |         self.assertTrue(ret)
79 |         os.chdir(cwd)
80 |         shutil.rmtree(tmp_dir)
81 | 
82 |     def test_3_download(self):
83 |         self.context.download(self.__class__.submission)
84 |         file_list = [
85 |             "bct-1/log.lammps",
86 |             "bct-2/log.lammps",
87 |             "bct-3/log.lammps",
88 |             "bct-4/log.lammps",
89 |         ]
90 |         for fname in file_list:
91 |             self.assertTrue(
92 |                 os.path.isfile(os.path.join(self.context.local_root, fname))
93 |             )
94 |             os.remove(os.path.join(self.context.local_root, fname))
95 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-1/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-1/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-2/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-2/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-3/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-3/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-4/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/bct-4/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_hdfs_dir/0_md/graph.pb:
--------------------------------------------------------------------------------
1 | ../../graph.pb


--------------------------------------------------------------------------------
/tests/test_if_cuda_multi_devices/test_dir/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/dpdispatcher/4816095c9e711259877fb90023ce74ce527ba5c3/tests/test_if_cuda_multi_devices/test_dir/test.txt


--------------------------------------------------------------------------------
/tests/test_import_classes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | __package__ = "tests"
 7 | from .context import (
 8 |     dpdispatcher,
 9 |     setUpModule,  # noqa: F401
10 | )
11 | 
12 | 
13 | class TestImportClasses(unittest.TestCase):
14 |     def setUp(self):
15 |         self.maxDiff = None
16 | 
17 |     def test_import_class_Machine(self):
18 |         from dpdispatcher import Machine
19 | 
20 |         self.assertEqual(dpdispatcher.machine.Machine, Machine)
21 | 
22 |     def test_import_class_Resources(self):
23 |         from dpdispatcher import Resources
24 | 
25 |         self.assertEqual(dpdispatcher.submission.Resources, Resources)
26 | 
27 |     def test_import_class_Submission(self):
28 |         from dpdispatcher import Submission
29 | 
30 |         self.assertEqual(dpdispatcher.submission.Submission, Submission)
31 | 
32 |     def test_import_class_Task(self):
33 |         from dpdispatcher import Task
34 | 
35 |         self.assertEqual(dpdispatcher.submission.Task, Task)
36 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-1/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-1/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-2/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-2/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-3/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-3/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-4/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/bct-4/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_jh_unischeduler/0_md/graph.pb:
--------------------------------------------------------------------------------
1 | ../../graph.pb


--------------------------------------------------------------------------------
/tests/test_lazy_local_context.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import sys
 4 | import unittest
 5 | from unittest.mock import MagicMock
 6 | 
 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 8 | __package__ = "tests"
 9 | from .context import (
10 |     LazyLocalContext,
11 |     setUpModule,  # noqa: F401
12 | )
13 | 
14 | 
15 | class TestLazyLocalContext(unittest.TestCase):
16 |     def setUp(self):
17 |         # os.makedirs('loc', exist_ok = True)
18 |         # os.makedirs('loc/task0', exist_ok = True)
19 |         # os.makedirs('loc/task1', exist_ok = True)
20 |         shutil.copytree(src="test_context_dir/", dst="tmp_lazy_local_context_dir/")
21 | 
22 |         self.lazy_local_context = LazyLocalContext(
23 |             local_root="tmp_lazy_local_context_dir/"
24 |         )
25 |         submission = MagicMock(work_base="0_md/")
26 |         self.lazy_local_context.bind_submission(submission)
27 | 
28 |     def tearDown(self):
29 |         shutil.rmtree("tmp_lazy_local_context_dir/")
30 | 
31 |     def test_upload(self):
32 |         pass
33 | 
34 |     def test_download(self):
35 |         pass
36 | 
37 |     # TODO: support other platforms
38 |     @unittest.skipIf(sys.platform != "linux", "not linux")
39 |     def test_block_call(self):
40 |         code, stdin, stdout, stderr = self.lazy_local_context.block_call("ls")
41 |         self.assertEqual(
42 |             stdout.readlines(),
43 |             [
44 |                 "bct-1\n",
45 |                 "bct-2\n",
46 |                 "bct-3\n",
47 |                 "bct-4\n",
48 |                 "dir with space\n",
49 |                 "graph.pb\n",
50 |                 "some_dir\n",
51 |             ],
52 |         )
53 |         self.assertEqual(code, 0)
54 | 
55 |         code, stdin, stdout, stderr = self.lazy_local_context.block_call("ls a")
56 |         self.assertEqual(code, 2)
57 |         # self.assertEqual(stderr.read().decode('utf-8'), "ls: cannot access 'a': No such file or directory\n")
58 |         err_msg = stderr.read().decode("utf-8")
59 |         self.assertTrue("ls: cannot access" in err_msg)
60 |         self.assertTrue("No such file or directory\n" in err_msg)
61 | 
62 |     # def test_block_checkcall(self) :
63 |     #     self.job  = LazyLocalContext('loc', None)
64 |     #     tasks = ['task0', 'task1']
65 |     #     files = ['test0', 'test1']
66 |     #     self.job.upload(tasks, files)
67 |     #     # ls
68 |     #     stdin, stdout, stderr = self.job.block_checkcall('ls')
69 |     #     self.assertEqual(stdout.read().decode('utf-8'), 'task0\ntask1\n')
70 |     #     self.assertEqual(stdout.readlines(), ['task0\n','task1\n'])
71 |     #     with self.assertRaises(RuntimeError):
72 |     #         stdin, stdout, stderr = self.job.block_checkcall('ls a')
73 | 
74 |     # def test_file(self) :
75 |     #     self.job = LazyLocalContext('loc', None)
76 |     #     self.assertFalse(self.job.check_file_exists('aaa'))
77 |     #     tmp = str(uuid.uuid4())
78 |     #     self.job.write_file('aaa', tmp)
79 |     #     self.assertTrue(self.job.check_file_exists('aaa'))
80 |     #     tmp1 = self.job.read_file('aaa')
81 |     #     self.assertEqual(tmp, tmp1)
82 | 
83 |     # def test_call(self) :
84 |     #     self.job = LazyLocalContext('loc', None)
85 |     #     proc = self.job.call('sleep 3')
86 |     #     self.assertFalse(self.job.check_finish(proc))
87 |     #     time.sleep(1)
88 |     #     self.assertFalse(self.job.check_finish(proc))
89 |     #     time.sleep(2.5)
90 |     #     self.assertTrue(self.job.check_finish(proc))
91 |     #     r,o,e=self.job.get_return(proc)
92 |     #     self.assertEqual(r, 0)
93 |     #     self.assertEqual(o.read(), b'')
94 |     #     self.assertEqual(e.read(), b'')
95 |     #     r,o,e=self.job.get_return(proc)
96 |     #     self.assertEqual(r, 0)
97 |     #     self.assertEqual(o, None)
98 |     #     self.assertEqual(e, None)
99 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-1/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-1/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-2/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-2/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-3/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-3/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-4/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/bct-4/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/graph.pb:
--------------------------------------------------------------------------------
1 | ../../graph.pb


--------------------------------------------------------------------------------
/tests/test_lsf_dir/0_md/submission.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "work_base": "0_md",
 3 |   "resources": {
 4 |     "number_node": 1,
 5 |     "cpu_per_node": 4,
 6 |     "gpu_per_node": 1,
 7 |     "queue_name": "V100_8_32",
 8 |     "group_size": 4,
 9 |     "if_cuda_multi_devices": true
10 |   },
11 |   "forward_common_files": [
12 |     "graph.pb"
13 |   ],
14 |   "backward_common_files": [
15 |     "submission.json"
16 |   ],
17 |   "belonging_jobs": [
18 |     {
19 |       "89936e3ac869b3132977da5cc4187725f3318ea3": {
20 |         "job_task_list": [
21 |           {
22 |             "command": "lmp_serial -i input.lammps",
23 |             "task_work_path": "bct-3",
24 |             "forward_files": [
25 |               "conf.lmp",
26 |               "input.lammps"
27 |             ],
28 |             "backward_files": [
29 |               "log.lammps"
30 |             ],
31 |             "outlog": "log",
32 |             "errlog": "err",
33 |             "task_need_resources": 0.25
34 |           },
35 |           {
36 |             "command": "lmp_serial -i input.lammps",
37 |             "task_work_path": "bct-2",
38 |             "forward_files": [
39 |               "conf.lmp",
40 |               "input.lammps"
41 |             ],
42 |             "backward_files": [
43 |               "log.lammps"
44 |             ],
45 |             "outlog": "log",
46 |             "errlog": "err",
47 |             "task_need_resources": 0.25
48 |           },
49 |           {
50 |             "command": "lmp_serial -i input.lammps",
51 |             "task_work_path": "bct-4",
52 |             "forward_files": [
53 |               "conf.lmp",
54 |               "input.lammps"
55 |             ],
56 |             "backward_files": [
57 |               "log.lammps"
58 |             ],
59 |             "outlog": "log",
60 |             "errlog": "err",
61 |             "task_need_resources": 0.5
62 |           },
63 |           {
64 |             "command": "lmp_serial -i input.lammps",
65 |             "task_work_path": "bct-1",
66 |             "forward_files": [
67 |               "conf.lmp",
68 |               "input.lammps"
69 |             ],
70 |             "backward_files": [
71 |               "log.lammps"
72 |             ],
73 |             "outlog": "log",
74 |             "errlog": "err",
75 |             "task_need_resources": 1
76 |           }
77 |         ],
78 |         "resources": {
79 |           "number_node": 1,
80 |           "cpu_per_node": 4,
81 |           "gpu_per_node": 1,
82 |           "queue_name": "V100_8_32",
83 |           "group_size": 4,
84 |           "if_cuda_multi_devices": true
85 |         },
86 |         "job_state": 5,
87 |         "job_id": "21463.scheduler",
88 |         "fail_count": 1
89 |       }
90 |     }
91 |   ]
92 | }


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-1/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-1/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-2/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-2/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-3/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-3/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-4/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/bct-4/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_pbs_dir/0_md/graph.pb:
--------------------------------------------------------------------------------
1 | ../../graph.pb


--------------------------------------------------------------------------------
/tests/test_retry.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | __package__ = "tests"
 7 | from .context import (
 8 |     RetrySignal,
 9 |     retry,
10 |     setUpModule,  # noqa: F401
11 | )
12 | 
13 | 
14 | class TestRetry(unittest.TestCase):
15 |     def test_retry_fail(self):
16 |         """Always retry."""
17 | 
18 |         @retry(max_retry=3, sleep=0.05, catch_exception=RetrySignal)
19 |         def some_method():
20 |             raise RetrySignal("Failed to do something")
21 | 
22 |         with self.assertRaises(RuntimeError):
23 |             some_method()
24 | 
25 |     def test_retry_success(self):
26 |         """Retry less than 3 times."""
27 |         retry_times = [0]
28 | 
29 |         @retry(max_retry=3, sleep=0.05, catch_exception=RetrySignal)
30 |         def some_method(retry_times):
31 |             if retry_times[0] < 2:
32 |                 retry_times[0] += 1
33 |                 raise RetrySignal("Failed to do something")
34 | 
35 |         some_method(retry_times)
36 | 


--------------------------------------------------------------------------------
/tests/test_rsync_flags.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | from unittest.mock import patch
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 7 | __package__ = "tests"
 8 | 
 9 | from dpdispatcher.utils.utils import rsync
10 | 
11 | 
12 | class TestRsyncFlags(unittest.TestCase):
13 |     """Test rsync function flags to ensure correct options are used."""
14 | 
15 |     @patch("dpdispatcher.utils.utils.run_cmd_with_all_output")
16 |     def test_rsync_flags_exclude_owner_group(self, mock_run_cmd):
17 |         """Test that rsync uses flags that exclude owner and group preservation."""
18 |         # Mock successful command execution
19 |         mock_run_cmd.return_value = (0, "", "")
20 | 
21 |         # Call rsync function
22 |         rsync("source_file", "dest_file", key_filename="test_key")
23 | 
24 |         # Verify the command was called
25 |         mock_run_cmd.assert_called_once()
26 | 
27 |         # Get the command that was executed
28 |         called_cmd = mock_run_cmd.call_args[0][0]
29 | 
30 |         # Verify the command contains the correct flags
31 |         self.assertIn("-rlptDz", called_cmd)
32 |         self.assertNotIn("-az", called_cmd)
33 | 
34 |         # Verify rsync command structure
35 |         self.assertIn("rsync", called_cmd)
36 |         self.assertIn("source_file", called_cmd)
37 |         self.assertIn("dest_file", called_cmd)
38 |         self.assertIn("-e", called_cmd)
39 |         self.assertIn("-q", called_cmd)
40 | 
41 |     @patch("dpdispatcher.utils.utils.run_cmd_with_all_output")
42 |     def test_rsync_with_proxy_command_flags(self, mock_run_cmd):
43 |         """Test that rsync uses correct flags even with proxy command."""
44 |         # Mock successful command execution
45 |         mock_run_cmd.return_value = (0, "", "")
46 | 
47 |         # Call rsync function with proxy command
48 |         rsync(
49 |             "source_file",
50 |             "dest_file",
51 |             key_filename="test_key",
52 |             proxy_command="ssh -W target:22 jump_host",
53 |         )
54 | 
55 |         # Verify the command was called
56 |         mock_run_cmd.assert_called_once()
57 | 
58 |         # Get the command that was executed
59 |         called_cmd = mock_run_cmd.call_args[0][0]
60 | 
61 |         # Verify the command contains the correct flags
62 |         self.assertIn("-rlptDz", called_cmd)
63 |         self.assertNotIn("-az", called_cmd)
64 | 
65 |     @patch("dpdispatcher.utils.utils.run_cmd_with_all_output")
66 |     def test_rsync_error_handling(self, mock_run_cmd):
67 |         """Test that rsync properly handles errors."""
68 |         # Mock failed command execution
69 |         mock_run_cmd.return_value = (
70 |             23,
71 |             "",
72 |             "rsync: chown failed: Operation not permitted",
73 |         )
74 | 
75 |         # Call rsync function and expect RuntimeError
76 |         with self.assertRaises(RuntimeError) as context:
77 |             rsync("source_file", "dest_file")
78 | 
79 |         # Verify error message contains the command and error
80 |         self.assertIn("Failed to run", str(context.exception))
81 |         self.assertIn(
82 |             "rsync: chown failed: Operation not permitted", str(context.exception)
83 |         )
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     unittest.main()
88 | 


--------------------------------------------------------------------------------
/tests/test_run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import tempfile
 4 | import unittest
 5 | from pathlib import Path
 6 | 
 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 8 | __package__ = "tests"
 9 | 
10 | from .context import run
11 | 
12 | 
13 | class TestRun(unittest.TestCase):
14 |     def test_run(self):
15 |         this_dir = Path(__file__).parent
16 |         cwd = os.getcwd()
17 |         with tempfile.TemporaryDirectory() as temp_dir:
18 |             try:
19 |                 os.chdir(temp_dir)
20 |                 run(filename=str(this_dir / "hello_world.py"))
21 |                 self.assertEqual(
22 |                     (Path(temp_dir) / "log").read_text().strip(), "hello world!"
23 |                 )
24 |             finally:
25 |                 os.chdir(cwd)
26 | 


--------------------------------------------------------------------------------
/tests/test_run_submission_bohrium.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import textwrap
 4 | import unittest
 5 | from pathlib import Path
 6 | 
 7 | sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
 8 | 
 9 | from test_run_submission import RunSubmission
10 | 
11 | 
12 | @unittest.skipIf(
13 |     os.environ.get("DPDISPATCHER_TEST") != "bohrium",
14 |     "outside the Bohrium testing environment",
15 | )
16 | class TestBohriumRun(RunSubmission, unittest.TestCase):
17 |     def setUp(self):
18 |         super().setUp()
19 |         self.machine_dict.update(
20 |             batch_type="Bohrium",
21 |             context_type="Bohrium",
22 |             remote_profile={
23 |                 "email": os.environ["BOHRIUM_EMAIL"],
24 |                 "password": os.environ["BOHRIUM_PASSWORD"],
25 |                 "project_id": int(os.environ["BOHRIUM_PROJECT_ID"]),
26 |                 "input_data": {
27 |                     "job_type": "indicate",
28 |                     "log_file": "log",
29 |                     "job_name": "dpdispather_test",
30 |                     "disk_size": 20,
31 |                     "scass_type": "c2_m4_cpu",
32 |                     "platform": "ali",
33 |                     "image_name": "registry.dp.tech/dptech/ubuntu:22.04-py3.10",
34 |                     "on_demand": 0,
35 |                 },
36 |             },
37 |         )
38 | 
39 |     @unittest.skip("Manaually skip")  # comment this line to open unittest
40 |     def test_async_run_submission(self):
41 |         return super().test_async_run_submission()
42 | 
43 | 
44 | @unittest.skipIf(
45 |     os.environ.get("DPDISPATCHER_TEST") != "bohrium",
46 |     "outside the Bohrium testing environment",
47 | )
48 | class TestOpenAPIRun(RunSubmission, unittest.TestCase):
49 |     def setUp(self):
50 |         super().setUp()
51 |         bohrium_config = textwrap.dedent(
52 |             """\
53 |             [Credentials]
54 |             accessKey={accesskey}
55 |             """
56 |         ).format(accesskey=os.environ["BOHRIUM_ACCESS_KEY"])
57 |         Path.home().joinpath(".brmconfig").write_text(bohrium_config)
58 |         self.machine_dict.update(
59 |             batch_type="OpenAPI",
60 |             context_type="OpenAPI",
61 |             remote_profile={
62 |                 "project_id": int(os.environ["BOHRIUM_PROJECT_ID"]),
63 |                 "machine_type": "c2_m4_cpu",
64 |                 "platform": "ali",
65 |                 "image_address": "registry.dp.tech/dptech/ubuntu:22.04-py3.10",
66 |                 "job_name": "dpdispather_test",
67 |             },
68 |         )
69 | 
70 |     @unittest.skip("Manaually skip")  # comment this line to open unittest
71 |     def test_async_run_submission(self):
72 |         return super().test_async_run_submission()
73 | 


--------------------------------------------------------------------------------
/tests/test_shell_cuda_multi_devices.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import sys
 5 | 
 6 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 7 | __package__ = "tests"
 8 | import unittest
 9 | 
10 | from .context import (
11 |     Machine,
12 |     Resources,
13 |     Submission,
14 |     Task,
15 |     get_file_md5,
16 |     setUpModule,  # noqa: F401
17 | )
18 | 
19 | 
20 | @unittest.skipIf(sys.platform == "win32", "Shell is not supported on Windows")
21 | class TestShellCudaMultiDevices(unittest.TestCase):
22 |     def setUp(self):
23 |         self.maxDiff = None
24 | 
25 |     def test_shell_cuda_multi_devices(self):
26 |         with open("jsons/machine_if_cuda_multi_devices.json") as f:
27 |             machine_dict = json.load(f)
28 |         machine = Machine.load_from_dict(machine_dict["machine"])
29 |         resources = Resources.load_from_dict(machine_dict["resources"])
30 | 
31 |         task_list = []
32 |         for ii in range(16):
33 |             task = Task(
34 |                 command=f"echo dpdispatcher_unittest_{ii}",
35 |                 task_work_path="./",
36 |                 forward_files=[],
37 |                 backward_files=[],
38 |                 outlog="out.txt",
39 |             )
40 |             task_list.append(task)
41 | 
42 |         submission = Submission(
43 |             work_base="test_dir/",
44 |             machine=machine,
45 |             resources=resources,
46 |             forward_common_files=["test.txt"],
47 |             backward_common_files=["out.txt"],
48 |             task_list=task_list,
49 |         )
50 |         submission.run_submission(clean=False)
51 | 
52 |         for ii in ["test.txt"]:
53 |             f1 = os.path.join("test_if_cuda_multi_devices/", "test_dir/", ii)
54 |             f2 = os.path.join(
55 |                 "tmp_if_cuda_multi_devices/", submission.submission_hash, ii
56 |             )
57 |             self.assertEqual(get_file_md5(f1), get_file_md5(f2))
58 | 
59 |         self.assertTrue(os.path.isfile("test_if_cuda_multi_devices/test_dir/out.txt"))
60 | 
61 |     @classmethod
62 |     def tearDownClass(cls):
63 |         shutil.rmtree("tmp_if_cuda_multi_devices/")
64 |         # pass
65 | 


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/fail_dir/mock_fail_task.txt:
--------------------------------------------------------------------------------
1 | # mock file for unittest; test when dpdispatcher meets fail task
2 | 


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/parent_dir/dir with space/example.txt:
--------------------------------------------------------------------------------
1 | dir with space
2 | 


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/parent_dir/dir1/example.txt:
--------------------------------------------------------------------------------
1 | # example1.txt
2 | 


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/parent_dir/dir2/example.txt:
--------------------------------------------------------------------------------
1 | # example2.txt
2 | 


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/parent_dir/dir3/example.txt:
--------------------------------------------------------------------------------
1 | # example3.txt
2 | 


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/parent_dir/dir4/example.txt:
--------------------------------------------------------------------------------
1 | # example4.txt
2 | 


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/parent_dir/graph.pb:
--------------------------------------------------------------------------------
1 | ../../graph.pb


--------------------------------------------------------------------------------
/tests/test_shell_trival_dir/recover_dir/mock_recover_task.txt:
--------------------------------------------------------------------------------
1 | # mock file for unittest; test when dpdispatcher need recover tasks
2 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-1/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-1/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-2/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-2/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-3/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-3/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-4/conf.lmp:
--------------------------------------------------------------------------------
 1 | 
 2 | 2 atoms
 3 | 1 atom types
 4 |    0.0000000000    4.0000000000 xlo xhi
 5 |    0.0000000000    4.0000000000 ylo yhi
 6 |    0.0000000000    3.3800000000 zlo zhi
 7 |    0.0000000000    0.0000000000    0.0000000000 xy xz yz
 8 | 
 9 | Atoms # atomic
10 | 
11 |      1      1    0.0000000000    0.0000000000    0.0000000000
12 |      2      1    2.0000000000    2.0000000000    1.6900000000
13 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/bct-4/input.lammps:
--------------------------------------------------------------------------------
 1 | clear
 2 | units   metal
 3 | dimension       3
 4 | boundary        p       p    p
 5 | atom_style      atomic
 6 | # box         tilt large
 7 | read_data       conf.lmp
 8 | mass            1 118.71
 9 | neigh_modify    every 1 delay 0 check no
10 | pair_style 	deepmd ../graph.pb
11 | pair_coeff
12 | compute         mype all pe
13 | compute         mymsd all  msd
14 | 
15 | thermo          20
16 | thermo_style    custom step temp  pe pxx pyy pzz pxy pxz pyz lx ly lz vol c_mymsd[*] spcpu
17 | min_style       cg
18 | fix             1 all box/relax iso 0.0
19 | minimize        1.000000e-12 1.000000e-06 5000 500000
20 | 
21 | # timestep      0.002
22 | # velocity        all create 2.0 7369221
23 | 
24 | # fix           2 all  npt temp 2.0 200.0 $(100.0*dt)  aniso 0.0 200000.0 $(1000.0*dt)
25 | # run           2000
26 | # unfix         2
27 | 
28 | dump            1  all custom 1 final.dump.relax id type xs ys zs fx fy fz
29 | run             10000
30 | 
31 | write_data	out.lmp
32 | 


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/d3c842c5b9476e48f7145b370cd330372b9293e1.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "work_base": "0_md",
  3 |   "resources": {
  4 |     "resources": {
  5 |       "number_node": 1,
  6 |       "cpu_per_node": 4,
  7 |       "gpu_per_node": 0,
  8 |       "queue_name": "debug",
  9 |       "group_size": 4,
 10 |       "if_cuda_multi_devices": false
 11 |     },
 12 |     "slurm_sbatch_dict": {
 13 |       "mem": "10G",
 14 |       "cpus_per_task": 1,
 15 |       "time": "120:0:0"
 16 |     }
 17 |   },
 18 |   "forward_common_files": [
 19 |     "graph.pb"
 20 |   ],
 21 |   "backward_common_files": [
 22 |     "*.json"
 23 |   ],
 24 |   "belonging_jobs": [
 25 |     {
 26 |       "8cda6723de155874106d96a543e1872c9fc9aa1d": {
 27 |         "job_task_list": [
 28 |           {
 29 |             "command": "/home/dp/deepmd-kit/bin/lmp -i input.lammps",
 30 |             "task_work_path": "bct-3",
 31 |             "forward_files": [
 32 |               "conf.lmp",
 33 |               "input.lammps"
 34 |             ],
 35 |             "backward_files": [
 36 |               "log.lammps"
 37 |             ],
 38 |             "outlog": "log",
 39 |             "errlog": "err",
 40 |             "task_need_resources": 0.25
 41 |           },
 42 |           {
 43 |             "command": "/home/dp/deepmd-kit/bin/lmp -i input.lammps",
 44 |             "task_work_path": "bct-2",
 45 |             "forward_files": [
 46 |               "conf.lmp",
 47 |               "input.lammps"
 48 |             ],
 49 |             "backward_files": [
 50 |               "log.lammps"
 51 |             ],
 52 |             "outlog": "log",
 53 |             "errlog": "err",
 54 |             "task_need_resources": 0.25
 55 |           },
 56 |           {
 57 |             "command": "/home/dp/deepmd-kit/bin/lmp -i input.lammps",
 58 |             "task_work_path": "bct-4",
 59 |             "forward_files": [
 60 |               "conf.lmp",
 61 |               "input.lammps"
 62 |             ],
 63 |             "backward_files": [
 64 |               "log.lammps"
 65 |             ],
 66 |             "outlog": "log",
 67 |             "errlog": "err",
 68 |             "task_need_resources": 0.5
 69 |           },
 70 |           {
 71 |             "command": "/home/dp/deepmd-kit/bin/lmp -i input.lammps",
 72 |             "task_work_path": "bct-1",
 73 |             "forward_files": [
 74 |               "conf.lmp",
 75 |               "input.lammps"
 76 |             ],
 77 |             "backward_files": [
 78 |               "log.lammps"
 79 |             ],
 80 |             "outlog": "log",
 81 |             "errlog": "err",
 82 |             "task_need_resources": 1
 83 |           }
 84 |         ],
 85 |         "resources": {
 86 |           "resources": {
 87 |             "number_node": 1,
 88 |             "cpu_per_node": 4,
 89 |             "gpu_per_node": 0,
 90 |             "queue_name": "debug",
 91 |             "group_size": 4,
 92 |             "if_cuda_multi_devices": false
 93 |           },
 94 |           "slurm_sbatch_dict": {
 95 |             "mem": "10G",
 96 |             "cpus_per_task": 1,
 97 |             "time": "120:0:0"
 98 |           }
 99 |         },
100 |         "job_state": 5,
101 |         "job_id": "20",
102 |         "fail_count": 1
103 |       }
104 |     }
105 |   ]
106 | }


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/graph.pb:
--------------------------------------------------------------------------------
1 | ../../graph.pb


--------------------------------------------------------------------------------
/tests/test_slurm_dir/0_md/submission.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "work_base": "0_md",
 3 |   "resources": {
 4 |     "number_node": 1,
 5 |     "cpu_per_node": 4,
 6 |     "gpu_per_node": 1,
 7 |     "queue_name": "V100_8_32",
 8 |     "group_size": 4,
 9 |     "if_cuda_multi_devices": true
10 |   },
11 |   "forward_common_files": [
12 |     "graph.pb"
13 |   ],
14 |   "backward_common_files": [
15 |     "submission.json"
16 |   ],
17 |   "belonging_jobs": [
18 |     {
19 |       "89936e3ac869b3132977da5cc4187725f3318ea3": {
20 |         "job_task_list": [
21 |           {
22 |             "command": "lmp_serial -i input.lammps",
23 |             "task_work_path": "bct-3",
24 |             "forward_files": [
25 |               "conf.lmp",
26 |               "input.lammps"
27 |             ],
28 |             "backward_files": [
29 |               "log.lammps"
30 |             ],
31 |             "outlog": "log",
32 |             "errlog": "err",
33 |             "task_need_resources": 0.25
34 |           },
35 |           {
36 |             "command": "lmp_serial -i input.lammps",
37 |             "task_work_path": "bct-2",
38 |             "forward_files": [
39 |               "conf.lmp",
40 |               "input.lammps"
41 |             ],
42 |             "backward_files": [
43 |               "log.lammps"
44 |             ],
45 |             "outlog": "log",
46 |             "errlog": "err",
47 |             "task_need_resources": 0.25
48 |           },
49 |           {
50 |             "command": "lmp_serial -i input.lammps",
51 |             "task_work_path": "bct-4",
52 |             "forward_files": [
53 |               "conf.lmp",
54 |               "input.lammps"
55 |             ],
56 |             "backward_files": [
57 |               "log.lammps"
58 |             ],
59 |             "outlog": "log",
60 |             "errlog": "err",
61 |             "task_need_resources": 0.5
62 |           },
63 |           {
64 |             "command": "lmp_serial -i input.lammps",
65 |             "task_work_path": "bct-1",
66 |             "forward_files": [
67 |               "conf.lmp",
68 |               "input.lammps"
69 |             ],
70 |             "backward_files": [
71 |               "log.lammps"
72 |             ],
73 |             "outlog": "log",
74 |             "errlog": "err",
75 |             "task_need_resources": 1
76 |           }
77 |         ],
78 |         "resources": {
79 |           "number_node": 1,
80 |           "cpu_per_node": 4,
81 |           "gpu_per_node": 1,
82 |           "queue_name": "V100_8_32",
83 |           "group_size": 4,
84 |           "if_cuda_multi_devices": true
85 |         },
86 |         "job_state": 5,
87 |         "job_id": "21463.scheduler",
88 |         "fail_count": 1
89 |       }
90 |     }
91 |   ]
92 | }


--------------------------------------------------------------------------------
/tests/test_ssh_jump_host.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import unittest
 4 | 
 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 6 | __package__ = "tests"
 7 | from .context import (
 8 |     SSHSession,
 9 |     setUpModule,  # noqa: F401
10 | )
11 | 
12 | 
13 | @unittest.skipIf(
14 |     os.environ.get("DPDISPATCHER_TEST") != "ssh", "outside the ssh testing environment"
15 | )
16 | class TestSSHJumpHost(unittest.TestCase):
17 |     """Test SSH jump host functionality."""
18 | 
19 |     def test_proxy_command_connection(self):
20 |         """Test SSH connection using proxy_command via jump host."""
21 |         # Test connection from test -> server via jumphost
22 |         ssh_session = SSHSession(
23 |             hostname="server",
24 |             username="root",
25 |             key_filename="/root/.ssh/id_rsa",
26 |             proxy_command="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /root/.ssh/id_rsa -W server:22 root@jumphost",
27 |         )
28 | 
29 |         # Verify the connection was established
30 |         self.assertIsNotNone(ssh_session.ssh)
31 |         self.assertTrue(ssh_session._check_alive())
32 | 
33 |         # Test running a simple command through the proxy
34 |         assert ssh_session.ssh is not None  # for type checker
35 |         stdin, stdout, stderr = ssh_session.ssh.exec_command("echo 'test via proxy'")
36 |         output = stdout.read().decode().strip()
37 |         self.assertEqual(output, "test via proxy")
38 | 
39 |         # Verify proxy_command attribute is set correctly
40 |         self.assertEqual(
41 |             ssh_session.proxy_command,
42 |             "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i /root/.ssh/id_rsa -W server:22 root@jumphost",
43 |         )
44 | 
45 |         ssh_session.close()
46 | 
47 |     def test_direct_connection_no_proxy(self):
48 |         """Test direct SSH connection without proxy command."""
49 |         # Test direct connection from test -> server (no proxy)
50 |         ssh_session = SSHSession(
51 |             hostname="server", username="root", key_filename="/root/.ssh/id_rsa"
52 |         )
53 | 
54 |         # Verify the connection was established
55 |         self.assertIsNotNone(ssh_session.ssh)
56 |         self.assertTrue(ssh_session._check_alive())
57 | 
58 |         # Test running a simple command
59 |         assert ssh_session.ssh is not None  # for type checker
60 |         stdin, stdout, stderr = ssh_session.ssh.exec_command("echo 'test direct'")
61 |         output = stdout.read().decode().strip()
62 |         self.assertEqual(output, "test direct")
63 | 
64 |         # Verify no proxy_command is set
65 |         self.assertIsNone(ssh_session.proxy_command)
66 | 
67 |         ssh_session.close()
68 | 
69 |     def test_jump_host_direct_connection(self):
70 |         """Test direct connection to jump host itself."""
71 |         # Test direct connection from test -> jumphost
72 |         ssh_session = SSHSession(
73 |             hostname="jumphost", username="root", key_filename="/root/.ssh/id_rsa"
74 |         )
75 | 
76 |         # Verify the connection was established
77 |         self.assertIsNotNone(ssh_session.ssh)
78 |         self.assertTrue(ssh_session._check_alive())
79 | 
80 |         # Test running a command on jumphost
81 |         assert ssh_session.ssh is not None  # for type checker
82 |         stdin, stdout, stderr = ssh_session.ssh.exec_command("hostname")
83 |         output = stdout.read().decode().strip()
84 |         self.assertEqual(output, "jumphost")
85 | 
86 |         ssh_session.close()
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     unittest.main()
91 | 


--------------------------------------------------------------------------------
/tests/test_work_path/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/dpdispatcher/4816095c9e711259877fb90023ce74ce527ba5c3/tests/test_work_path/.gitkeep


--------------------------------------------------------------------------------