├── .clang-format ├── .devcontainer ├── Dockerfile ├── README.md ├── constraints.txt ├── devcontainer.json ├── git_completion.bash ├── library-scripts │ └── common-debian.sh └── requirements.txt ├── .github └── workflows │ ├── black_lint.yaml │ ├── develop_comment.yaml │ ├── github-pages.yml │ ├── main_comment.yaml │ ├── pr_bot_grabber.py │ └── update_slack_pr_bot.yaml ├── .gitignore ├── .gitmodules ├── .nojekyll ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Jenkinsfile.py310 ├── Jenkinsfile.py311 ├── Jenkinsfile.py312 ├── LICENSE ├── README.md ├── SYSTEM_TESTS.md ├── deploy └── kubernetes │ ├── Dockerfile │ ├── deploy-dragon-with-hsta.sh │ ├── dragon-build-with-hsta-pod.yaml │ ├── dragon-pvc-develop.yaml │ └── management-pod.yaml ├── doc ├── .gitignore ├── Makefile ├── README.md ├── _static │ └── css │ │ └── custom.css ├── _templates │ └── autosummary │ │ ├── class.rst │ │ └── module.rst ├── benchmarks.rst ├── benchmarks │ ├── ddict.rst │ └── scipy_image.rst ├── cbook │ ├── ai-in-the-loop.rst │ ├── ai.rst │ ├── basic_pandarallel_demo.rst │ ├── bioinfo_alignment_pandarallel_demo.rst │ ├── c_channels_demo.rst │ ├── core.rst │ ├── data.rst │ ├── dict_torch_dataset.rst │ ├── distr-inf-telemetry.rst │ ├── dna_rna_dataloader.rst │ ├── dragon_joblib.rst │ ├── dragon_mpi_workflow.rst │ ├── dragon_native_pi.rst │ ├── dragon_native_policy_demo.rst │ ├── dragon_native_queue.rst │ ├── dragon_parsl_mpi_app.rst │ ├── dragon_telemetry.rst │ ├── images │ │ ├── PrimePipeline.jpg │ │ ├── ai-in-the-loop-workflow.jpg │ │ ├── dragon_dict_architecture.png │ │ ├── dragon_dict_results.png │ │ ├── dragon_mpi_workflow.puml │ │ ├── grafana-dashboards.png │ │ ├── grafana-imports.png │ │ ├── grafana-telem-dashboard.png │ │ ├── grafana-upload-json.png │ │ ├── llm-grafana-many-prompts.jpg │ │ ├── llm-grafana-single-prompt-response.jpg │ │ ├── llm-grafana-telem-data.jpg │ │ ├── sharedstate_pascal_triangle.jpg │ │ └── telemetry_deployment_diagram.jpg │ ├── mp_merge_sort.rst │ ├── mp_queue_demo.rst │ ├── mp_scipy_image.rst │ ├── multiprocessing.rst │ ├── pipeline.rst │ ├── shared_state_pascal_triangle.rst │ ├── telemetry.rst │ ├── torch-scipy-telemetry.rst │ └── workflows.rst ├── conf.py ├── devguide.rst ├── devguide │ ├── ddict.rst │ ├── glossary.rst │ ├── images │ │ ├── circlesquare.png │ │ ├── ddict_bringup.srms │ │ ├── ddict_clear.srms │ │ ├── ddict_contains.srms │ │ ├── ddict_get.srms │ │ ├── ddict_getLength.srms │ │ ├── ddict_keys.srms │ │ ├── ddict_overview.png │ │ ├── ddict_pop.srms │ │ ├── ddict_put.srms │ │ ├── ddict_teardown.srms │ │ ├── dragon_domain_model.puml │ │ ├── manager.png │ │ └── working_set.png │ ├── multiprocessing.rst │ └── runtime │ │ ├── architecture.rst │ │ ├── core.rst │ │ ├── global_services.rst │ │ ├── images │ │ ├── deployment_multi_node.puml │ │ ├── deployment_single_node.puml │ │ ├── global_services.puml │ │ ├── gsmonitor.graffle │ │ ├── managedservices.graffle │ │ ├── processstates.graffle │ │ ├── single_startup.srms │ │ ├── single_teardown.srms │ │ ├── singlenodeoverview.graffle │ │ ├── startup_seq_multi_node.puml │ │ ├── startup_seq_single_node.puml │ │ └── teardown_seq_multi_node.puml │ │ ├── index.rst │ │ ├── launcher.rst │ │ ├── local_services.rst │ │ ├── multi_node_deployment.rst │ │ ├── single_node_deployment.rst │ │ ├── telemetry.rst │ │ └── transport_agent.rst ├── faq.rst ├── images │ ├── ddict.png │ ├── ddict_128nodes.png │ ├── ddict_128to648nodes.png │ ├── dragon_api_stack.png │ ├── dragon_arch_organization.jpg │ ├── dragon_deployment.jpg │ ├── dragon_logo.png │ ├── dragon_sw_stack.jpg │ └── overview_queue_doc.jpg ├── index.rst ├── install.rst ├── make.bat ├── old_cython │ ├── channels.rst │ ├── dtypes.rst │ ├── fli.rst │ ├── heap.rst │ ├── heapmanager.rst │ ├── lock.rst │ ├── managed_memory.rst │ └── utils.rst ├── old_devguide │ ├── envvars.rst │ ├── gs_client.rst │ ├── images │ │ ├── api_use_core.puml │ │ ├── api_use_python.puml │ │ ├── architecture.puml │ │ ├── client_architecture.puml │ │ ├── infrastructure_architecture.puml │ │ ├── mpbridge_architecture.puml │ │ ├── mpbridge_class_diagram.puml │ │ ├── overview.graffle │ │ └── roundrobin.puml │ ├── infrastructure.rst │ ├── internal │ │ ├── communication │ │ │ ├── channels │ │ │ │ ├── channels.rst │ │ │ │ ├── channels_performance.rst │ │ │ │ ├── cy_channels.rst │ │ │ │ ├── gateway_channels.rst │ │ │ │ ├── images │ │ │ │ │ ├── .gitignore │ │ │ │ │ └── ChannelArchitecture.puml │ │ │ │ └── priority_heap.rst │ │ │ ├── communication.rst │ │ │ └── mrnet.rst │ │ ├── components │ │ │ ├── broadcast.rst │ │ │ ├── components.rst │ │ │ ├── images │ │ │ │ ├── .gitignore │ │ │ │ ├── bcast.graffle │ │ │ │ └── bcastflow.srms │ │ │ ├── managed_memory │ │ │ │ ├── bitset.rst │ │ │ │ ├── cy_managedmemory.rst │ │ │ │ ├── heapmanager.rst │ │ │ │ ├── hexdump.rst │ │ │ │ ├── images │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── heapallocations.graffle │ │ │ │ │ ├── heapfree1.graffle │ │ │ │ │ ├── heapfree2.graffle │ │ │ │ │ ├── heapfree3.graffle │ │ │ │ │ ├── heapfree4.graffle │ │ │ │ │ ├── heapfree5.graffle │ │ │ │ │ └── managed_memory.puml │ │ │ │ └── managed_memory.rst │ │ │ ├── scalable_locks │ │ │ │ ├── cy_scalable_locks.rst.needs_work │ │ │ │ ├── images │ │ │ │ │ └── scalable_locks.puml │ │ │ │ └── scalable_locks.rst.needs_work │ │ │ └── unordered_map.rst │ │ ├── infrastructure │ │ │ ├── bootstrapping.rst │ │ │ ├── conventional_ids.rst │ │ │ ├── images │ │ │ │ ├── .gitignore │ │ │ │ ├── infrastructure.puml │ │ │ │ ├── launchproc.srms │ │ │ │ ├── multi_cray_startup.srms │ │ │ │ ├── multi_cray_teardown.srms │ │ │ │ ├── multinodeoverview.pdf │ │ │ │ ├── overlay_network_fanout.puml │ │ │ │ └── singlenode.srms │ │ │ ├── infrastructure.rst │ │ │ ├── logging.rst │ │ │ ├── overlay_network.rst │ │ │ └── processes.rst │ │ ├── internal.rst │ │ ├── introduction.rst │ │ ├── ref │ │ │ ├── core │ │ │ │ ├── Cython │ │ │ │ │ ├── dtypes.rst │ │ │ │ │ ├── heap.rst │ │ │ │ │ ├── lock.rst │ │ │ │ │ └── utils.rst │ │ │ │ ├── c │ │ │ │ │ ├── bcast.rst │ │ │ │ │ ├── bitset.rst │ │ │ │ │ ├── hashtable.rst │ │ │ │ │ ├── heap_manager.rst │ │ │ │ │ ├── image │ │ │ │ │ │ └── bcastflow.puml │ │ │ │ │ ├── priority_heap.rst │ │ │ │ │ ├── shared_lock.rst │ │ │ │ │ ├── ulist.rst │ │ │ │ │ ├── umap.rst │ │ │ │ │ └── utils.rst │ │ │ │ ├── images │ │ │ │ │ └── core_architecture.puml │ │ │ │ └── index.rst │ │ │ └── ref.rst │ │ ├── services │ │ │ ├── images │ │ │ │ ├── .gitignore │ │ │ │ ├── MultiNodeOverview.graffle │ │ │ │ ├── PoolOverview.graffle │ │ │ │ ├── jupytermode.graffle │ │ │ │ ├── launcher_multi_node.puml │ │ │ │ ├── launcher_single_node.puml │ │ │ │ ├── launchercomponents.graffle │ │ │ │ ├── launcherstates.graffle │ │ │ │ ├── multinode.srms │ │ │ │ ├── server.srms │ │ │ │ ├── servermode.graffle │ │ │ │ ├── services.puml │ │ │ │ ├── shepherd.puml │ │ │ │ ├── shepherdstructure.graffle │ │ │ │ ├── singlenodelauncher.graffle │ │ │ │ └── teardown.srms │ │ │ ├── services.rst │ │ │ └── transport_agent │ │ │ │ ├── .gitignore │ │ │ │ ├── hsta │ │ │ │ └── hsta.rst │ │ │ │ ├── images │ │ │ │ ├── .gitignore │ │ │ │ ├── RemoteSendRecvStructure.graffle │ │ │ │ ├── TSTSComponents.graffle │ │ │ │ ├── channels.graffle │ │ │ │ ├── newchannelops.graffle │ │ │ │ ├── origchannelops.graffle │ │ │ │ ├── overview.graffle │ │ │ │ ├── overview2.graffle │ │ │ │ ├── recv.srms │ │ │ │ ├── send.srms │ │ │ │ └── transport_agent.puml │ │ │ │ ├── tcp.rst │ │ │ │ └── tcp │ │ │ │ ├── dragon.transport.tcp.agent.rst │ │ │ │ ├── dragon.transport.tcp.client.rst │ │ │ │ ├── dragon.transport.tcp.errno.rst │ │ │ │ ├── dragon.transport.tcp.io.rst │ │ │ │ ├── dragon.transport.tcp.messages.rst │ │ │ │ ├── dragon.transport.tcp.rst │ │ │ │ ├── dragon.transport.tcp.server.rst │ │ │ │ ├── dragon.transport.tcp.task.rst │ │ │ │ ├── dragon.transport.tcp.transport.rst │ │ │ │ ├── dragon.transport.tcp.util.rst │ │ │ │ └── index.rst │ │ ├── testing │ │ │ ├── images │ │ │ │ ├── .gitignore │ │ │ │ └── shepherdconnections.graffle │ │ │ └── testing.rst │ │ └── using_dragon │ │ │ ├── dragon_native.rst │ │ │ ├── images │ │ │ ├── .gitignore │ │ │ ├── dragon_with_dragon_native_api.puml │ │ │ ├── dragon_with_native.puml │ │ │ ├── dragon_with_python_multiprocessing.puml │ │ │ ├── global_architecture.puml │ │ │ └── python_multiprocessing.puml │ │ │ ├── python_multiprocessing.rst │ │ │ ├── running_dragon.rst │ │ │ └── using_dragon.rst │ ├── intro.rst │ ├── issues.rst │ ├── mpbridge.rst │ ├── native.rst │ ├── owner.rst │ ├── policy.rst │ ├── resource_model.rst │ ├── running.rst │ └── stack.rst ├── ref.rst ├── ref │ ├── ai │ │ └── index.rst │ ├── client │ │ ├── images │ │ │ ├── build.sh │ │ │ ├── channel_ChannelDescriptor_attach.srms │ │ │ ├── channel_ChannelDescriptor_detach.srms │ │ │ ├── channel_ChannelDescriptor_get_recvh.srms │ │ │ ├── channel_ChannelDescriptor_get_sendh.srms │ │ │ ├── channel_ChannelDescriptor_refresh.srms │ │ │ ├── channel_RecvChannel_close.srms │ │ │ ├── channel_RecvChannel_open.srms │ │ │ ├── channel_SendChannel_close.srms │ │ │ ├── channel_SendChannel_open.srms │ │ │ ├── channel_create.srms │ │ │ ├── channel_destroy.srms │ │ │ ├── channel_list.srms │ │ │ ├── channel_query.srms │ │ │ ├── process_ProcessInfo_refresh.srms │ │ │ ├── process_create.srms │ │ │ ├── process_join.srms │ │ │ ├── process_kill.srms │ │ │ ├── process_list.srms │ │ │ └── process_query.srms │ │ └── index.rst │ ├── core │ │ ├── c │ │ │ ├── channels.rst │ │ │ ├── channelsets.rst │ │ │ ├── fli.rst │ │ │ ├── image │ │ │ │ ├── bcast.png │ │ │ │ └── bcastflow.puml │ │ │ └── managed_memory.rst │ │ ├── images │ │ │ └── core_architecture.puml │ │ └── index.rst │ ├── data │ │ ├── C │ │ │ └── ddict.rst │ │ └── index.rst │ ├── inf │ │ ├── index.rst │ │ └── logging.rst │ ├── mpbridge │ │ └── multiprocessing.rst │ ├── native │ │ ├── C++ │ │ │ ├── index.rst │ │ │ └── queue.rst │ │ ├── C │ │ │ ├── control_structures.rst │ │ │ ├── index.rst │ │ │ └── queue.rst │ │ ├── Fortran │ │ │ ├── index.rst │ │ │ └── queue.rst │ │ └── index.rst │ ├── policy.rst │ ├── telemetry │ │ ├── dragon.telemetry.telemetry.rst │ │ └── index.rst │ └── workflows │ │ └── index.rst ├── start.rst ├── uses.rst └── uses │ ├── data_processing.rst │ ├── debugging.rst │ ├── distributed_training.rst │ ├── gpus.rst │ ├── grafana.rst │ ├── jupyter.rst │ ├── multinode.rst │ ├── orchestrate_mpi.rst │ ├── orchestrate_procs.rst │ └── workflow.rst ├── doc_spec ├── Makefile ├── README.md ├── client.rst ├── conf.py ├── images │ ├── channel_ChannelDescriptor_attach.srms │ ├── channel_ChannelDescriptor_detach.srms │ ├── channel_ChannelDescriptor_get_recvh.srms │ ├── channel_ChannelDescriptor_get_sendh.srms │ ├── channel_ChannelDescriptor_refresh.srms │ ├── channel_RecvChannel_close.srms │ ├── channel_RecvChannel_open.srms │ ├── channel_SendChannel_close.srms │ ├── channel_SendChannel_open.srms │ ├── channel_create.srms │ ├── channel_destroy.srms │ ├── channel_list.srms │ ├── channel_query.srms │ ├── process_ProcessInfo_refresh.srms │ ├── process_create.srms │ ├── process_join.srms │ ├── process_kill.srms │ ├── process_list.srms │ └── process_query.srms └── index.rst ├── dst ├── dragon_build.sh ├── manylinux2014_py310.yaml ├── manylinux2014_py311.yaml ├── manylinux2014_py312.yaml ├── runBuild.sh ├── runBuildPrep.general.sh ├── runBuildPrep.py310.sh ├── runBuildPrep.py311.sh ├── runBuildPrep.py312.sh └── runUnitTest.sh ├── examples ├── README.md ├── benchmarks │ └── gups_ddict.py ├── dragon_ai │ ├── README.md │ ├── dict_torch_dataset.py │ └── dna_rna_dataloader │ │ ├── dataset.py │ │ ├── doc2vec_DM.py │ │ ├── generate_synthetic_data.py │ │ ├── models.py │ │ ├── simulate.py │ │ └── test_doc2vec_DM.py ├── dragon_core │ ├── Makefile │ ├── README.md │ ├── logging.c │ ├── logging.py │ ├── performance │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── ch_p2p_bandwidth.c │ │ ├── ch_p2p_benchmark.py │ │ ├── ch_p2p_common.c │ │ ├── ch_p2p_common.h │ │ ├── ch_p2p_latency.c │ │ ├── ch_p2p_msg_rate.c │ │ └── run_ch_benchmark.sh │ ├── ring.py │ └── ringproc.c ├── dragon_data │ ├── README.md │ ├── ddict │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── block_size_bench.py │ │ ├── ddict_bench.py │ │ ├── ddict_bench_pg.py │ │ ├── ddict_checkpoint_pi.py │ │ ├── ddict_cpp_driver.py │ │ ├── ddict_pi_sim_aggregate.cpp │ │ ├── ddict_pi_sim_train.cpp │ │ ├── ddict_restart.py │ │ ├── demo_ddict.py │ │ ├── demo_ddict_manager_placement.py │ │ ├── demo_ddict_pool.py │ │ ├── serializable.cpp │ │ └── serializable.hpp │ ├── requirements.txt │ └── zarr_benchmark.py ├── dragon_gs_client │ ├── Makefile │ ├── README.md │ ├── connection_demo.py │ ├── dragon_group_create_addto_demo.py │ ├── dragon_group_demo.py │ ├── dragon_group_kill_demo.py │ ├── dragon_group_mpi_demo.py │ ├── dragon_popen_api.py │ ├── dragon_run_api.py │ ├── dragon_server_client_api.py │ ├── managed_mem_demo.py │ ├── mpi_hello.c │ ├── pi_demo.py │ ├── queue_demo.py │ └── requests.txt ├── dragon_native │ ├── README.md │ ├── mpi │ │ ├── Makefile │ │ ├── README.md │ │ ├── alltoall │ │ │ ├── COPYRIGHT │ │ │ ├── Makefile │ │ │ ├── osu_alltoall.c │ │ │ ├── osu_util.c │ │ │ ├── osu_util.h │ │ │ ├── osu_util_graph.c │ │ │ ├── osu_util_graph.h │ │ │ ├── osu_util_mpi.c │ │ │ ├── osu_util_mpi.h │ │ │ ├── osu_util_options.h │ │ │ ├── osu_util_papi.c │ │ │ ├── osu_util_papi.h │ │ │ └── osu_util_validation.c │ │ ├── hpc_workflow_demo.py │ │ ├── hpc_workflow_demo_highlevel.py │ │ ├── mpi_hello.c │ │ ├── mpi_process_group_demo.py │ │ └── policy_demo.py │ └── pi_demo.py ├── dragon_telemetry │ ├── README.md │ ├── merge_sort.py │ ├── scipy_scale_work.py │ └── telemetry.yaml ├── dragon_workflows │ ├── lazy_attach.py │ ├── mpi_hello.c │ ├── run_client.sh │ ├── run_server.sh │ └── server.py ├── jupyter │ ├── Ensemble.ipynb │ ├── JupyterDragon.ipynb │ ├── README.md │ ├── basic_pandarallel_demo.ipynb │ ├── bioinformatics_alignment_pandarallel_demo.ipynb │ ├── bioinformatics_alignment_pandarallel_multinode_demo.ipynb │ ├── doc_ref │ │ ├── basic_pandarallel_demo.py │ │ ├── bioinformatics_alignment_pandarallel_demo.py │ │ └── bioinformatics_alignment_pandarallel_multinode_demo.py │ ├── example.py │ ├── example_prometheus.yml │ ├── getout.py │ ├── imageproc.py │ ├── llm_backend.py │ ├── llm_example.ipynb │ ├── requirements_llm.txt │ └── telemetry.py ├── multiprocessing │ ├── README.md │ ├── aa_bench.py │ ├── distmerge.py │ ├── joblib │ │ ├── bench_auto_batching.py │ │ ├── compressor_comparison.py │ │ ├── delayed_comparison.py │ │ ├── memory_basic_usage.py │ │ ├── nested_parallel_memory.py │ │ ├── parallel_memmap.py │ │ ├── parallel_random_state.py │ │ └── serialization_and_wrappers.py │ ├── lock_performance.py │ ├── merge_sort.py │ ├── numpy-mpi4py-examples │ │ ├── README.md │ │ ├── mpi4py_aa_bench.py │ │ ├── mpi4py_p2p_lat.py │ │ ├── numpy_scale_work.py │ │ ├── parsl_batched_scipy_scale_work.py │ │ └── scipy_scale_work.py │ ├── p2p_bw.py │ ├── p2p_lat.py │ ├── perf.py │ ├── prime_numbers.py │ ├── queue_demo.py │ ├── scipy_image_demo.py │ ├── shared_state_pascal_triangle.py │ ├── torch-scipy-telemetry │ │ ├── README.md │ │ ├── conv.py │ │ ├── mnist.py │ │ ├── requirements.txt │ │ ├── telem.py │ │ └── telemetry_full.py │ └── unittests │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── common.py │ │ ├── mp_fork_bomb.py │ │ ├── orig │ │ ├── _test_multiprocessing.py │ │ └── _test_multiprocessing_spawn.py │ │ ├── test_array.py │ │ ├── test_barrier.py │ │ ├── test_condition.py │ │ ├── test_connection.py │ │ ├── test_containers.py │ │ ├── test_event.py │ │ ├── test_finalize.py │ │ ├── test_listener.py │ │ ├── test_lock.py │ │ ├── test_manager.py │ │ ├── test_others.py │ │ ├── test_poll.py │ │ ├── test_pool.py │ │ ├── test_process.py │ │ ├── test_queue.py │ │ ├── test_semaphore.py │ │ ├── test_shared_ctypes.py │ │ ├── test_shared_memory.py │ │ └── test_value.py ├── smartsim │ └── client_logging │ │ ├── Makefile │ │ ├── README.md │ │ ├── logging_shim.py │ │ ├── rand.cpp │ │ └── rand_failure.cpp └── workflows │ ├── ai-in-the-loop │ ├── Makefile │ ├── README.md │ ├── ai-in-the-loop.py │ ├── model.py │ ├── model_pretrained_poly.pt │ ├── sim-cheap.c │ └── sim-expensive.c │ ├── parsl │ ├── Makefile │ ├── README.md │ ├── factorial.c │ └── parsl_mpi_app_demo.py │ └── resiliency │ ├── bash_resiliency.py │ ├── dragon_resiliency.py │ ├── run_bash.sh │ └── run_dragon.sh ├── external └── Makefile ├── hack ├── VARIABLES ├── build ├── clean_build ├── cpython_setup ├── dragonbuild ├── load_cray_python.sh ├── mergelogs ├── script.gdb ├── setup └── where4all ├── pyproject.toml ├── related_work ├── README.md └── parallel_python_ray_numerical_computation │ ├── README.md │ ├── exec_per_node.sh │ ├── get_ips_from_json.py │ ├── manual_launch.sh │ ├── parallel_python_ray_numerical_computation.py │ └── requirements.txt ├── src ├── Doxyfile ├── Makefile ├── dragon │ ├── .gitignore │ ├── __init__.py │ ├── __main__.py │ ├── ai │ │ ├── __init__.py │ │ └── torch │ │ │ ├── __init__.py │ │ │ ├── dataloader_monkeypatch.py │ │ │ ├── dictdataset.py │ │ │ └── monkeypatching.py │ ├── channels.pxd │ ├── cli │ │ ├── __init__.py │ │ └── __main__.py │ ├── data │ │ ├── __init__.py │ │ ├── ddict │ │ │ ├── __init__.py │ │ │ ├── ddict.py │ │ │ ├── manager.py │ │ │ └── orchestrator.py │ │ └── zarr │ │ │ ├── __init__.py │ │ │ └── store.py │ ├── dlogging │ │ ├── __init__.py │ │ ├── log_setup.py │ │ ├── pydragon_logging.pyx │ │ └── util.py │ ├── dtypes_inc.pxd │ ├── globalservices │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── api_setup.py │ │ ├── channel.py │ │ ├── channel_int.py │ │ ├── group.py │ │ ├── group_int.py │ │ ├── node.py │ │ ├── node_int.py │ │ ├── policy_eval.py │ │ ├── pool.py │ │ ├── pool_int.py │ │ ├── process.py │ │ ├── process_int.py │ │ ├── server.py │ │ └── startup.py │ ├── infrastructure │ │ ├── __init__.py │ │ ├── channel_desc.py │ │ ├── config.py │ │ ├── connection.py │ │ ├── debug_support.py │ │ ├── facts.py │ │ ├── gpu_desc.py │ │ ├── group_desc.py │ │ ├── messages.py │ │ ├── minconnection.py │ │ ├── node_desc.py │ │ ├── parameters.py │ │ ├── policy.py │ │ ├── pool_desc.py │ │ ├── process_desc.py │ │ ├── standalone_conn.py │ │ ├── util.py │ │ └── watchers.py │ ├── jupyter │ │ └── server.py │ ├── launcher │ │ ├── __init__.py │ │ ├── backend.py │ │ ├── dragon_multi_be.py │ │ ├── dragon_multi_fe.py │ │ ├── dragon_single.py │ │ ├── frontend.py │ │ ├── include │ │ │ └── _pmsgqueue.h │ │ ├── launch_multi_ls.py │ │ ├── launch_selector.py │ │ ├── launchargs.py │ │ ├── network_config.py │ │ ├── pydragon_pmsgqueue.pyx │ │ ├── util.py │ │ └── wlm │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── k8s.py │ │ │ ├── pbs_pals.py │ │ │ ├── slurm.py │ │ │ └── ssh.py │ ├── localservices │ │ ├── __init__.py │ │ ├── local_svc.py │ │ ├── manager.py │ │ ├── options.py │ │ ├── process_desc.py │ │ └── server.py │ ├── managed_memory.pxd │ ├── mpbridge │ │ ├── __init__.py │ │ ├── connection.py │ │ ├── context.py │ │ ├── heap.py │ │ ├── managers.py │ │ ├── monkeypatching.py │ │ ├── pool.py │ │ ├── process.py │ │ ├── queues.py │ │ ├── reduction.py │ │ ├── shared_memory.py │ │ ├── sharedctypes.py │ │ ├── synchronize.py │ │ └── util.py │ ├── native │ │ ├── __init__.py │ │ ├── array.py │ │ ├── barrier.py │ │ ├── event.py │ │ ├── lock.py │ │ ├── logging.py │ │ ├── machine.py │ │ ├── pool.py │ │ ├── process.py │ │ ├── process_group.py │ │ ├── queue.py │ │ ├── semaphore.py │ │ └── value.py │ ├── pydragon_channels.pyx │ ├── pydragon_dtypes.pyx │ ├── pydragon_fli.pyx │ ├── pydragon_heap.pyx │ ├── pydragon_lock.pyx │ ├── pydragon_managed_memory.pyx │ ├── pydragon_perf.pyx │ ├── pydragon_pmod.pyx │ ├── pydragon_utils.pyx │ ├── telemetry │ │ ├── __init__.py │ │ ├── aggregator_app.py │ │ ├── analysis.py │ │ ├── collector.py │ │ ├── dragon_server.py │ │ ├── imports │ │ │ ├── Grafana_DragonTelemetryDashboard.json │ │ │ ├── Grafana_DragonTelemetryDashboard_custom_metrics.json │ │ │ ├── Grafana_DragonTelemetry_ResilientDashboard.json │ │ │ └── custom.yaml │ │ ├── progress_bar.py │ │ ├── telemetry.py │ │ ├── telemetry_head.py │ │ ├── tsdb_app.py │ │ └── tsdb_server.py │ ├── transport │ │ ├── __init__.py │ │ ├── ifaddrs.py │ │ ├── oob │ │ │ └── __init__.py │ │ ├── overlay │ │ │ └── __init__.py │ │ ├── tcp │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── agent.py │ │ │ ├── client.py │ │ │ ├── errno.py │ │ │ ├── io.py │ │ │ ├── messages.py │ │ │ ├── server.py │ │ │ ├── task.py │ │ │ ├── transport.py │ │ │ └── util.py │ │ ├── util.py │ │ └── x509.py │ └── workflows │ │ ├── __init__.py │ │ ├── parsl_batch_executor.py │ │ ├── parsl_executor.py │ │ ├── parsl_mpi_app.py │ │ └── runtime.py ├── include │ ├── Makefile │ └── dragon │ │ ├── bcast.h │ │ ├── channels.h │ │ ├── channelsets.h │ │ ├── ddict.h │ │ ├── dictionary.hpp │ │ ├── fli.h │ │ ├── global_types.h │ │ ├── managed_memory.h │ │ ├── messages.hpp │ │ ├── perf.h │ │ ├── pmod.h │ │ ├── queue.h │ │ ├── queue.hpp │ │ ├── return_codes.h │ │ ├── return_codes_map_maker.py │ │ ├── shared_lock.h │ │ └── utils.h ├── lib │ ├── Makefile │ ├── _bcast.h │ ├── _bitset.h │ ├── _blocks.h │ ├── _channels.h │ ├── _channelsets.h │ ├── _ddict.hpp │ ├── _fli.h │ ├── _hashtable.h │ ├── _heap_manager.h │ ├── _hexdump.h │ ├── _managed_memory.h │ ├── _pals.h │ ├── _pmod.h │ ├── _queue.h │ ├── _shared_lock.h │ ├── _utils.h │ ├── bcast.c │ ├── bitset.c │ ├── blocks.c │ ├── channels.c │ ├── channels_messages.c │ ├── channelsets.c │ ├── ddict.cpp │ ├── err.h │ ├── fli.c │ ├── gpu │ │ ├── cuda.cpp │ │ ├── cuda.hpp │ │ ├── gpu.cpp │ │ ├── gpu.hpp │ │ ├── hip.cpp │ │ ├── hip.hpp │ │ ├── ze.cpp │ │ └── ze.hpp │ ├── hashtable.c │ ├── heap_manager.c │ ├── hexdump.c │ ├── hostid.h │ ├── logging.c │ ├── logging.h │ ├── managed_memory.c │ ├── message_defs.capnp │ ├── message_tcs_to_enum.py │ ├── messages.cpp │ ├── pals.c │ ├── perf.cpp │ ├── pmod_pals.c │ ├── pmod_recv_mpi.c │ ├── pmod_send_mpi.c │ ├── priority_heap.c │ ├── priority_heap.h │ ├── queue.c │ ├── queue.cpp │ ├── queue.f90 │ ├── shared_lock.c │ ├── shared_lock.h │ ├── shared_lock.hpp │ ├── ulist.cpp │ ├── ulist.h │ ├── umap.cpp │ ├── umap.h │ └── utils.c ├── pkg │ ├── CHANGELOG.md │ ├── LICENSE.md │ ├── Makefile │ ├── README.md │ └── RELEASE_NOTES.md ├── pyproject.toml ├── requirements.txt ├── requirements_examples.txt ├── setup.py └── tools │ ├── dragon-cleanup │ ├── dragon-flame-graph │ └── dragon-node-cleanup └── test ├── .gitignore ├── MANIFEST ├── Makefile ├── _ctest_utils.h ├── ai └── torch │ ├── README.md │ └── test_pytorch_patches.py ├── all_mp_unittest.txt ├── broadcast ├── .gitignore ├── Makefile ├── perf_bcast.c └── test_bcast.c ├── ccfutures ├── executor.py ├── test │ ├── __init__.py │ └── support │ │ ├── __init__.py │ │ ├── _hypothesis_stubs │ │ ├── __init__.py │ │ ├── _helpers.py │ │ └── strategies.py │ │ ├── ast_helper.py │ │ ├── asynchat.py │ │ ├── asyncore.py │ │ ├── bytecode_helper.py │ │ ├── hashlib_helper.py │ │ ├── hypothesis_helper.py │ │ ├── i18n_helper.py │ │ ├── import_helper.py │ │ ├── interpreters │ │ ├── __init__.py │ │ ├── _crossinterp.py │ │ ├── channels.py │ │ └── queues.py │ │ ├── logging_helper.py │ │ ├── os_helper.py │ │ ├── pty_helper.py │ │ ├── refleak_helper.py │ │ ├── script_helper.py │ │ ├── smtpd.py │ │ ├── socket_helper.py │ │ ├── strace_helper.py │ │ ├── testcase.py │ │ ├── threading_helper.py │ │ ├── venv.py │ │ └── warnings_helper.py ├── test_as_completed.py ├── test_deadlock.py ├── test_future.py ├── test_init.py ├── test_interpreter_pool.py ├── test_process_pool.py ├── test_shutdown.py ├── test_thread_pool.py ├── test_wait.py └── util.py ├── channels_subtests ├── .gitignore ├── Makefile ├── ch1.py ├── ch2.py ├── ch_ex.py ├── perf_channels.py ├── perf_fch.c ├── test_basic_channels.py ├── test_bch.c ├── test_capnp.cpp ├── test_capnp.py ├── test_ch.c ├── test_channelsets.c ├── test_fli.c ├── test_fli.py ├── test_gateway_messages.c ├── test_gateways.c ├── test_peek_pop.c ├── test_poll.c ├── test_send.c └── test_wrong.c ├── connection ├── basic_example.py ├── conn_arg_test_target.py ├── connection_obj_tests.py ├── simple_speed.py ├── standalone_conn_bench.py ├── standalone_conn_bench_aa.py ├── standalone_conn_example.py └── standalone_conn_test.py ├── debug ├── example.py └── tester.py ├── globalservices ├── __init__.py ├── group_api.py ├── jumbo_arg_test_target.py ├── managed_proc_test_target.py ├── process_api.py ├── single_internal.py ├── single_process_msg.py ├── test_policy_eval.py └── test_refcounting.py ├── gpu └── test_gpu.cpp ├── hashtable ├── .gitignore ├── Makefile └── test_hashtable.c ├── heapmanager ├── .gitignore ├── Makefile └── test_heapmanager.c ├── infrastructure ├── MANIFEST ├── env_parameter_tests.py ├── newline_stream_wrapper_test.py ├── test_gpu_desc.py └── test_policy.py ├── integration ├── aa_bench.py ├── single_example.py ├── slow_echo.sh ├── smoke.py ├── start_single.py └── start_single_ls.py ├── launcher ├── backend_testing_mocks.py ├── bad_hostfile.txt ├── frontend_testing_mocks.py ├── good_hostfile.txt ├── launcher_testing_utils.py ├── slurm.json ├── slurm.yaml ├── slurm_bad.json ├── slurm_bad.yaml ├── slurm_big.yaml ├── slurm_primary.yaml ├── test_backend_bringup.py ├── test_frontend_bringup.py ├── test_launch_options.py ├── test_network_config.py ├── test_resilient_restart.py └── test_signal_handling.py ├── launcher_multi ├── cleanup ├── helloworld.py ├── runhello.sh ├── runtest.sh ├── runtransporttest.sh ├── serial_compute.py ├── test_tcp_bringup │ ├── runhello.sh │ ├── test_ifaddr.py │ ├── test_launcher_be.py │ └── test_launcher_fe.py └── transport_test.py ├── localservices └── smoke.py ├── minconnection ├── basic_example.py ├── minconnection_obj_tests.py └── test_minconnection.py ├── mp_bench ├── automation_input │ ├── access_managed_dict_test_input.csv │ ├── basic_managed_dict_test_input.csv │ ├── basic_parameter_study_input.csv │ ├── basic_pool_test_input.csv │ ├── basic_process_test_input.csv │ ├── chain_token_pipe_test_input.csv │ ├── chain_token_test_input.csv │ ├── chain_token_test_simple_input.csv │ ├── pool_invocation_test_input.csv │ ├── second_parameter_study_input.csv │ └── send_receive_test_input.csv ├── basic_parameter_study.py ├── doc │ ├── .gitignore │ ├── Makefile │ ├── benchmarks │ │ ├── chain_token.rst │ │ ├── chain_token_pipe.rst │ │ ├── managed_dict_basic.rst │ │ ├── pool_basic.rst │ │ └── process_basic.rst │ ├── conf.py │ ├── generalities.rst │ └── index.rst ├── scalable_mp_benchmarks.py ├── setup.sh ├── tests │ ├── access_managed_dict_test.py │ ├── basic_managed_dict_test.py │ ├── basic_pool_test.py │ ├── basic_process_test.py │ ├── chain_token_pipe_test.py │ ├── chain_token_test.py │ ├── chain_token_test_simple.py │ ├── pool_invocation_test.py │ └── send_receive_test.py ├── user_parameter_study │ ├── Makefile │ ├── monte_carlo_pi_param_study.py │ ├── user_executable.c │ ├── user_executable.py │ └── user_parameter_study.py └── util │ ├── .gitignore │ └── test_util.py ├── mpbridge ├── test_api.py ├── test_array.py ├── test_barrier.py ├── test_condition.py ├── test_heap.py ├── test_lock.py ├── test_mpbridge_basic.py ├── test_mpbridge_context_wait.py ├── test_pipe_with_process.py ├── test_pool.py ├── test_process.py ├── test_queue.py └── test_value.py ├── multi-node ├── Makefile ├── c_ddict.c ├── cleanup.sh ├── cpp_ddict.cpp ├── mpi_hello.c ├── test_array.py ├── test_barrier.py ├── test_connection.py ├── test_distdict.py ├── test_distdict_c.py ├── test_fli.py ├── test_hsta.py ├── test_lock.py ├── test_machine.py ├── test_mpi_hello_world.py ├── test_pool.py ├── test_process.py ├── test_process_group.py ├── test_queue.py ├── test_runtime_restart.py ├── test_value.py └── test_zarr_store.py ├── native ├── Makefile ├── c_ddict.c ├── cpp_ddict.cpp ├── filenames.txt ├── flimsgfrom.cpp ├── flimsgfrom.py ├── flimsgto.cpp ├── test_array.py ├── test_barrier.py ├── test_ddict.py ├── test_ddict_c_driver.py ├── test_ddict_cpp_driver.py ├── test_event.py ├── test_lock.py ├── test_logging.py ├── test_machine.py ├── test_msgs.py ├── test_pool.py ├── test_process.py ├── test_process_group.py ├── test_queue.py ├── test_redirection.py ├── test_semaphore.py └── test_value.py ├── pkg ├── Makefile └── README.md ├── pmod ├── .gitignore ├── Makefile └── test_pmod.c ├── pmsgq ├── .gitignore ├── Makefile ├── ex.py └── test_pmsgq.c ├── process ├── firstlight.py ├── hello.py ├── mphello.py └── nativeprocess.py ├── release ├── hello.py ├── test_bounce.sh ├── test_mpi_wrkflw.sh ├── test_scipy_img_scale.sh └── test_scipy_img_scale.slurm ├── repeat_to_failure ├── setup.sh ├── shepherd ├── README.md ├── gs_stub.py ├── proc1.py ├── proc2.py ├── proc3.py ├── proc4.py ├── procenv1.py └── procenv2.py ├── shim_dragon_paths.py ├── skipped_test_launcher.py ├── skipped_test_msgqueue.py ├── support ├── __init__.py └── util.py ├── telemetry ├── telemetry_data.py ├── test_aggregator_app.py ├── test_analysis.py ├── test_collector.py ├── test_dragon_server.py ├── test_tsdb_app.py └── test_tsdb_server.py ├── test_c_files.py ├── test_channels.py ├── test_connection.py ├── test_distdict.py ├── test_globalservices.py ├── test_infrastructure.py ├── test_integration_shep_gs.py ├── test_launcher.py ├── test_ls_multi.py ├── test_mpbridge.py ├── test_native.py ├── test_policy.py ├── test_shepherd.py ├── test_telemetry.py ├── test_transport.py ├── test_utils.py ├── transport ├── tcp │ ├── test_address.py │ ├── test_agent.py │ ├── test_client.py │ ├── test_errno.py │ ├── test_gateway_message.py │ ├── test_io.py │ ├── test_messages.py │ ├── test_server.py │ ├── test_stream_transport.py │ ├── test_streams.py │ ├── test_task.py │ ├── test_transport.py │ └── test_util.py ├── test_lsif.py ├── test_tcp_transport.py └── test_x509.py └── utils ├── .gitignore ├── Makefile ├── files.txt ├── lock_bench.c ├── test_attach.c ├── test_basic_mempool.py ├── test_blocks.c ├── test_heap.c ├── test_locks.py ├── test_log.c ├── test_logging.py ├── test_mem.c ├── test_mempool.py ├── test_pyheap.py ├── test_queue.c ├── test_serialized_uid.c ├── test_threaded_lock.c ├── ulist.c └── umap.c /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Mozilla 3 | BreakBeforeBraces: Linux 4 | ColumnLimit: "110" 5 | UseTab: Never 6 | IndentWidth: "4" 7 | BinPackArguments: true 8 | BinPackParameters: true 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | SpacesInConditionalStatement: False 11 | PointerAlignment: Left 12 | -------------------------------------------------------------------------------- /.devcontainer/README.md: -------------------------------------------------------------------------------- 1 | See https://github.com/microsoft/vscode-dev-containers and 2 | https://github.com/devcontainers/features for more information. 3 | 4 | 5 | # Speed up command prompt 6 | 7 | Using Git in a devcontainer may be problematic on MacOS or Windows since the 8 | runtime (e.g., Docker Desktop, Podman) will actually run containers in a Linux 9 | VM. Refreshing the Git index (e.g., by runnnig `git status`) may alleviate the 10 | issue, at least until it is refreshed on the host. 11 | 12 | Since the codespaces theme includes Git status in the shell prompt by default, 13 | it may seem like a devcontainer is slow or unresponsive with each prompt. The 14 | most effective way to prevent this is simply to disable Git in the command 15 | prompt. Run the following to update Git's configuration in your working tree: 16 | 17 | ``` 18 | $ git config codespaces-theme.hide-status 1 19 | ``` 20 | 21 | See https://github.com/devcontainers/features/tree/main/src/common-utils for more details. 22 | -------------------------------------------------------------------------------- /.devcontainer/git_completion.bash: -------------------------------------------------------------------------------- 1 | if [ -f ~/.git-completion.bash ]; then 2 | . ~/.git-completion.bash 3 | fi -------------------------------------------------------------------------------- /.devcontainer/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://download.pytorch.org/whl/cpu 2 | --trusted-host download.pytorch.org 3 | alive-progress>=3.2.0 4 | auditwheel>=6.2.0 5 | black>=24.8.0 6 | breathe>=4.35.0 7 | cloudpickle>=3.0.0 8 | cryptography>=43.0.1 9 | Cython==3.0.12 10 | flask>=3.0.3 11 | gunicorn>=23.0.0 12 | numpy>=2.0.2 13 | numcodecs>=0.13.1,<0.16.0 14 | parameterized>=0.9.0 15 | parsl>=2024.9.23 16 | psutil>=6.0.0 17 | pycapnp>=2.0.0 18 | pynvml>=12.0.0 19 | PyYAML>=6.0.2 20 | requests>=2.32.3 21 | setuptools>=75.6.0 22 | six>=1.17.0 23 | Sphinx>=7.4.7 24 | sphinx-fortran>=1.1.1 25 | sphinx-rtd-theme>=2.0.0 26 | sphinx-copybutton>=0.5.2 27 | sphinxcontrib-plantuml>=0.3.0 28 | sphinx-new-tab-link>=0.7.0 29 | torch>=2.0.0 30 | wheel>=0.45.1 31 | kubernetes>=32.0.0 32 | zarr==2.18.3 33 | -------------------------------------------------------------------------------- /.github/workflows/black_lint.yaml: -------------------------------------------------------------------------------- 1 | name: Black Linter 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: [self-hosted] 8 | steps: 9 | - uses: actions/checkout@v4 10 | #- name: Set up Python 11 | # uses: actions/setup-python@v4 12 | # with: 13 | # python-version: "3.11.11" 14 | 15 | - name: Install Black 16 | run: | 17 | module load cray-python 18 | python3 --version 19 | pip install "black>=25.1.0" 20 | black --version 21 | 22 | - name: Check Src 23 | run: | 24 | pwd 25 | set -e 26 | cd ./src 27 | pwd 28 | black . --check --diff --color 29 | 30 | - name: Check Tests 31 | run: | 32 | pwd 33 | set -e 34 | cd ./test 35 | pwd 36 | black . --check --diff --color 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /.github/workflows/update_slack_pr_bot.yaml: -------------------------------------------------------------------------------- 1 | # This workflow runs a cron job that updates slack with current PR statuses 2 | 3 | name: Dragon PR grabber 4 | 5 | on: 6 | workflow_dispatch: 7 | schedule: 8 | - cron: "54 13 * * 1-5" 9 | 10 | permissions: 11 | contents: read 12 | 13 | # Run on pe27rome with a runner going from nhill's account 14 | jobs: 15 | pr-notifier-bot: 16 | runs-on: [ self-hosted ] 17 | 18 | steps: 19 | - name: Run Pull Request Grabber and notify Slack 20 | run: | 21 | module swap PrgEnv-cray PrgEnv-gnu 22 | module load cray-python 23 | python3 -m venv _env 24 | . _env/bin/activate 25 | python3 -m pip install PyGithub 26 | python3 .github/workflows/pr_bot_grabber.py 27 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/.nojekyll -------------------------------------------------------------------------------- /Jenkinsfile.py310: -------------------------------------------------------------------------------- 1 | @Library("dst-shared-2@master") _ 2 | 3 | buildRunner { 4 | product = "dragon" 5 | buildAgent = "dragon-dst-build" 6 | timeout = 90 7 | buildScript = "dst/dragon_build.sh -n -p 310" 8 | } 9 | -------------------------------------------------------------------------------- /Jenkinsfile.py311: -------------------------------------------------------------------------------- 1 | @Library("dst-shared-2@master") _ 2 | 3 | buildRunner { 4 | product = "dragon" 5 | buildAgent = "dragon-dst-build" 6 | timeout = 90 7 | buildScript = "dst/dragon_build.sh -n -p 311" 8 | } 9 | -------------------------------------------------------------------------------- /Jenkinsfile.py312: -------------------------------------------------------------------------------- 1 | @Library("dst-shared-2@master") _ 2 | 3 | buildRunner { 4 | product = "dragon" 5 | buildAgent = "dragon-dst-build" 6 | timeout = 90 7 | buildScript = "dst/dragon_build.sh -n -p 312" 8 | } 9 | -------------------------------------------------------------------------------- /deploy/kubernetes/dragon-pvc-develop.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: dragon-develop-pvc 5 | spec: 6 | accessModes: 7 | - ReadWriteMany 8 | resources: 9 | requests: 10 | storage: 30Gi 11 | storageClassName: standard-file -------------------------------------------------------------------------------- /deploy/kubernetes/management-pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: management-pod 5 | spec: 6 | containers: 7 | - name: management-container 8 | image: mkalantzi/dragondev:latest 9 | command: ["/bin/bash", "-c"] 10 | args: 11 | - | 12 | apt upgrade -y && apt update && \ 13 | # cd dragon-develop && \ 14 | # cd hpc-pe-dragon-dragon-k8s-merge-develop && \ 15 | # . hack/setup && \ 16 | sleep 1000000 17 | volumeMounts: 18 | - mountPath: /dragon-develop 19 | name: dragon-develop-volume1 20 | nodeSelector: 21 | kubernetes.io/hostname: svc-02 22 | volumes: 23 | - name: dragon-develop-volume1 24 | persistentVolumeClaim: 25 | claimName: dragon-develop-pvc 26 | restartPolicy: Never -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | *.png 3 | !images/ddict.png 4 | !images/ddict_128nodes.png 5 | !images/ddict_128to648nodes.png 6 | plantuml.jar 7 | ref/dragon*.rst 8 | ref/ai/dragon*.rst 9 | ref/telemetry/dragon*.rst 10 | ref/core/dragon*.rst 11 | ref/client/dragon*.rst 12 | ref/inf/dragon*.rst 13 | ref/native/dragon*.rst 14 | ref/native/Python/dragon*.rst 15 | ref/mpbridge/dragon*.rst 16 | ref/data/Python/dragon*.rst 17 | ref/data/dragon*.rst 18 | ref/workflows/dragon*.rst 19 | ref/native/Python/dragon*.rst 20 | *.svg 21 | internal/services/transport_agent/tcp/* 22 | -------------------------------------------------------------------------------- /doc/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname | escape | underline}} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. autoclass:: {{ objname }} 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | :inherited-members: 10 | 11 | {% block methods %} 12 | .. automethod:: __init__ 13 | 14 | {% if methods %} 15 | .. rubric:: {{ _('Methods') }} 16 | 17 | .. autosummary:: 18 | {% for item in methods %} 19 | ~{{ name }}.{{ item }} 20 | {%- endfor %} 21 | {% endif %} 22 | {% endblock %} 23 | 24 | {% block attributes %} 25 | {% if attributes %} 26 | .. rubric:: {{ _('Attributes') }} 27 | 28 | .. autosummary:: 29 | {% for item in attributes %} 30 | ~{{ name }}.{{ item }} 31 | {%- endfor %} 32 | {% endif %} 33 | {% endblock %} 34 | -------------------------------------------------------------------------------- /doc/benchmarks.rst: -------------------------------------------------------------------------------- 1 | .. _benchmarks: 2 | 3 | Benchmarks 4 | ++++++++++ 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | benchmarks/scipy_image.rst 10 | benchmarks/ddict.rst -------------------------------------------------------------------------------- /doc/benchmarks/ddict.rst: -------------------------------------------------------------------------------- 1 | Distributed Dictionary Performance 2 | ++++++++++++++++++++++++++++++++++ 3 | 4 | .. code-block:: console 5 | 6 | dragon gups_ddict.py --nclients=8192 --managers_per_node=2 --num_nodes=128 --total_mem_size=128 --mem_frac=0.6 --iterations=6 7 | 8 | 9 | .. figure:: ../images/ddict_128nodes.png 10 | :align: center 11 | :scale: 40 % 12 | 13 | 14 | .. code-block:: console 15 | 16 | dragon gups_ddict.py --nclients=8192 --managers_per_node=2 --num_nodes=128 --total_mem_size=16384 --mem_frac=0.6 --iterations=1 --value_size_min=16777216 --value_size_max=67108864 17 | dragon gups_ddict.py --nclients=16384 --managers_per_node=2 --num_nodes=256 --total_mem_size=32768 --mem_frac=0.6 --iterations=1 --value_size_min=16777216 --value_size_max=67108864 18 | dragon gups_ddict.py --nclients=32768 --managers_per_node=2 --num_nodes=512 --total_mem_size=65536 --mem_frac=0.6 --iterations=1 --value_size_min=16777216 --value_size_max=67108864 19 | 20 | 21 | .. figure:: ../images/ddict_128to648nodes.png 22 | :align: center 23 | :scale: 40 % -------------------------------------------------------------------------------- /doc/cbook/ai.rst: -------------------------------------------------------------------------------- 1 | AI 2 | ++ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | ai-in-the-loop.rst 8 | dict_torch_dataset.rst 9 | dna_rna_dataloader.rst 10 | -------------------------------------------------------------------------------- /doc/cbook/basic_pandarallel_demo.rst: -------------------------------------------------------------------------------- 1 | Basic Pandarallel Demonstration for Single Node Environment 2 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 | 4 | This Jupyter benchmark is a simple use case for the pandarallel `parallel_apply` call. 5 | It can be run with `dragon` and base multiprocessing to compare performance on your machine. 6 | 7 | The program demonstrates how to use `parallel_apply`, the multiprocessing verison of pandas `apply`, on a pandas dataframe with random input. 8 | 9 | The code demonstrates the following key concepts working with Dragon: 10 | 11 | * How to write basic programs that can run with Dragon and base multiprocessing 12 | * How to use pandarallel and pandas with Dragon and base multiprocessing 13 | * How pandarallel handles various dtypes 14 | 15 | .. literalinclude:: ../../examples/jupyter/doc_ref/basic_pandarallel_demo.py -------------------------------------------------------------------------------- /doc/cbook/core.rst: -------------------------------------------------------------------------------- 1 | Core 2 | ++++ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | dragon_native_pi.rst 8 | c_channels_demo.rst 9 | dragon_native_queue.rst 10 | dragon_native_policy_demo.rst 11 | -------------------------------------------------------------------------------- /doc/cbook/data.rst: -------------------------------------------------------------------------------- 1 | Dragon Data 2 | +++++++++++ 3 | 4 | .. .. toctree:: 5 | .. :maxdepth: 1 6 | 7 | .. dragon_dict.rst -------------------------------------------------------------------------------- /doc/cbook/images/PrimePipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/PrimePipeline.jpg -------------------------------------------------------------------------------- /doc/cbook/images/ai-in-the-loop-workflow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/ai-in-the-loop-workflow.jpg -------------------------------------------------------------------------------- /doc/cbook/images/dragon_dict_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/dragon_dict_architecture.png -------------------------------------------------------------------------------- /doc/cbook/images/dragon_dict_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/dragon_dict_results.png -------------------------------------------------------------------------------- /doc/cbook/images/dragon_mpi_workflow.puml: -------------------------------------------------------------------------------- 1 | ' machine for the Dragon native Pool() implementation 2 | 3 | 4 | @startuml 5 | skinparam componentStyle uml2 6 | skinparam shadowing false 7 | skinparam monochrome true 8 | 9 | ' skinparam linetype ortho 10 | 'skinparam linetype polyline 11 | ' skinparam nodesep 10 12 | ' skinparam ranksep 20 13 | 14 | (*) --> ===B1=== 15 | (*) --> Consumer 16 | ===B1=== --> "Producer 1" 17 | ===B1=== --> "Producer 2" 18 | "Producer 1" --> "MPI Applications" as mpi1 19 | "Producer 1" --> "Parser" as parser1 20 | "Producer 2" --> "MPI Applications" as mpi2 21 | "Producer 2" --> "Parser" as parser2 22 | "Producer 1" -[hidden]r-> "Producer 2" 23 | mpi1 ..>[stdout of\nhead process] "Parser" as parser1 24 | mpi2 ..>[stdout of\nhead process] "Parser" as parser2 25 | parser1 ..>[Put parsed results\ninto shared queue] Consumer 26 | parser2 ..>[Put parsed results\ninto shared queue] Consumer 27 | parser1 -[hidden]d-> ===B2=== 28 | parser2 -[hidden]d-> ===B2=== 29 | "Producer 1" --> ===B2=== 30 | "Producer 2" --> ===B2=== 31 | ===B1=== -[hidden]-> ===B2=== 32 | ===B2=== --> Consumer 33 | Consumer -d-> (*) 34 | 35 | @enduml -------------------------------------------------------------------------------- /doc/cbook/images/grafana-dashboards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/grafana-dashboards.png -------------------------------------------------------------------------------- /doc/cbook/images/grafana-imports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/grafana-imports.png -------------------------------------------------------------------------------- /doc/cbook/images/grafana-telem-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/grafana-telem-dashboard.png -------------------------------------------------------------------------------- /doc/cbook/images/grafana-upload-json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/grafana-upload-json.png -------------------------------------------------------------------------------- /doc/cbook/images/llm-grafana-many-prompts.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/llm-grafana-many-prompts.jpg -------------------------------------------------------------------------------- /doc/cbook/images/llm-grafana-single-prompt-response.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/llm-grafana-single-prompt-response.jpg -------------------------------------------------------------------------------- /doc/cbook/images/llm-grafana-telem-data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/llm-grafana-telem-data.jpg -------------------------------------------------------------------------------- /doc/cbook/images/sharedstate_pascal_triangle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/sharedstate_pascal_triangle.jpg -------------------------------------------------------------------------------- /doc/cbook/images/telemetry_deployment_diagram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/cbook/images/telemetry_deployment_diagram.jpg -------------------------------------------------------------------------------- /doc/cbook/multiprocessing.rst: -------------------------------------------------------------------------------- 1 | Multiprocessing with Dragon 2 | +++++++++++++++++++++++++++ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | mp_merge_sort.rst 8 | mp_queue_demo.rst 9 | mp_scipy_image.rst 10 | torch-scipy-telemetry.rst 11 | distr-inf-telemetry.rst 12 | pipeline.rst 13 | shared_state_pascal_triangle.rst 14 | -------------------------------------------------------------------------------- /doc/cbook/telemetry.rst: -------------------------------------------------------------------------------- 1 | Telemetry 2 | +++++++++ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | dragon_telemetry.rst -------------------------------------------------------------------------------- /doc/cbook/workflows.rst: -------------------------------------------------------------------------------- 1 | Workflows with Dragon 2 | +++++++++++++++++++++ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | ai-in-the-loop.rst 8 | dragon_mpi_workflow.rst 9 | dragon_parsl_mpi_app.rst 10 | dragon_joblib.rst -------------------------------------------------------------------------------- /doc/devguide/images/circlesquare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/images/circlesquare.png -------------------------------------------------------------------------------- /doc/devguide/images/ddict_clear.srms: -------------------------------------------------------------------------------- 1 | 8 4 2 | begin components 3 | c "User Program" 4 | m1 "Manager 1" 5 | m2 "Manager 2" 6 | end; 7 | ltext@ c "Client program invokes clear to all managers"; 8 | ltext@ c "Then all managers remove all kay-value pairs"; 9 | ltext@ c "Client receive response from managers"; 10 | ; 11 | c m1 "DDClear(clientID, chkptID) sent to manager1 main fli."; 12 | c m2 "DDClear(clientID, chkptID) sent to manager2 main fli."; 13 | ; 14 | m1 c "DDClearResponse(status=DRAGON_SUCCESS), sent to client's buffered fli"; 15 | m2 c "DDClearResponse(status=DRAGON_SUCCESS), sent to client's buffered fli"; -------------------------------------------------------------------------------- /doc/devguide/images/ddict_contains.srms: -------------------------------------------------------------------------------- 1 | 8 4 2 | begin components 3 | c "User Program" 4 | m1 "Manager 1" 5 | m2 "Manager 2" 6 | end; 7 | ltext@ c "Client program invokes to check key existence"; 8 | ltext@ c "Then it hashes the key once key"; 9 | ltext@ c "It chooses the manager by computing the remainder of"; 10 | ltext@ c "dividing by the number of managers."; 11 | ; 12 | c m2 "DDContains(clientID, chkptID) sent to chosen manager main fli."; 13 | c m2 "Key - not a message"; 14 | c m2 "EOT (low-level fli protocol)"; 15 | m2 c "DDContainsResponse(status=DRAGON_SUCCESS) sent to client's buffered fli"; -------------------------------------------------------------------------------- /doc/devguide/images/ddict_get.srms: -------------------------------------------------------------------------------- 1 | 8 5 2 | begin components 3 | c "User Program" 4 | m1 "Manager 1" 5 | m2 "Manager 2" 6 | end; 7 | ltext@ c "Client program invokes get on a key"; 8 | ltext@ c "The Client buffers all writes for the key value."; 9 | ltext@ c "Then it hashes the key once key writing is complete"; 10 | ltext@ c "It chooses the manager by computing the remainder of"; 11 | ltext@ c "dividing by the number of managers."; 12 | ; 13 | c m2 "DDGet(clientID, chkptID) sent to chosen manager main fli."; 14 | c m2 "Key value is written on one write - not a message"; 15 | c m2 "Send handle closed resulting in EOT (low-level fli protocol)"; 16 | m2 c "DDGetResponse(err=DRAGON_SUCCESS) sent to client RespFLI"; 17 | m2 c "Value part 1"; 18 | m2 c "Value part ..."; 19 | m2 c "Value part n"; 20 | m2 c "EOT (low-level fli protocol)"; -------------------------------------------------------------------------------- /doc/devguide/images/ddict_getLength.srms: -------------------------------------------------------------------------------- 1 | 8 4 2 | begin components 3 | c "User Program" 4 | m1 "Manager 1" 5 | m2 "Manager 2" 6 | end; 7 | ltext@ c "Client program invokes get length from all managers"; 8 | ltext@ c "Then all managers send their length of kvs to client"; 9 | ltext@ c "Client sum up the length"; 10 | ; 11 | c m1 "DDLength(clientID, chkptID) sent to all managers' main fli."; 12 | c m2 "DDLength(clientID, chkptID) sent to all managers' main fli."; 13 | ; 14 | m1 c "DDLengthResponse(status=DRAGON_SUCCESS) sent to client's buffered fli"; 15 | m2 c "DDLengthResponse(status=DRAGON_SUCCESS) sent to client's buffered fli"; 16 | -------------------------------------------------------------------------------- /doc/devguide/images/ddict_keys.srms: -------------------------------------------------------------------------------- 1 | 8 6 2 | begin components 3 | c "User Program" 4 | m1 "Manager 1" 5 | m2 "Manager 2" 6 | end; 7 | ltext@ c "Client program invokes to get all keys from all managers"; 8 | ltext@ c "Then all managers send their keys to client"; 9 | ltext@ c "Client gathers keys and return it"; 10 | ; 11 | c m1 "DDKeys(clientID, chkptID) sent to manager1 main fli."; 12 | c m1 "EOT (low-level fli protocol)"; 13 | m1 c "DDKeysResponse(status=DRAGON_SUCCESS, keys_length=n)"; 14 | m1 c "Key 1"; 15 | m1 c "Key ..."; 16 | m1 c "Key n"; 17 | m1 c "EOT (low-level fli protocol)"; 18 | ; 19 | c m2 "DDKeys(clientID) sent to manager2 main fli."; 20 | c m2 "EOT (low-level fli protocol)"; 21 | m2 c "DDKeysResponse(status=DRAGON_SUCCESS, keys_length=n)"; 22 | m2 c "Key 1"; 23 | m2 c "Key ..."; 24 | m2 c "Key n"; 25 | m2 c "EOT (low-level fli protocol)"; -------------------------------------------------------------------------------- /doc/devguide/images/ddict_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/images/ddict_overview.png -------------------------------------------------------------------------------- /doc/devguide/images/ddict_pop.srms: -------------------------------------------------------------------------------- 1 | 8 5 2 | begin components 3 | c "User Program" 4 | m1 "Manager 1" 5 | m2 "Manager 2" 6 | end; 7 | ltext@ c "Client program invokes get on a key"; 8 | ltext@ c "The Client buffers all writes for the key value."; 9 | ltext@ c "Then it hashes the key once key writing is complete"; 10 | ltext@ c "It chooses the manager by computing the remainder of"; 11 | ltext@ c "dividing by the number of managers."; 12 | ; 13 | c m2 "DDPop(clientID, chkptID) sent to chosen manager main fli."; 14 | c m2 "Key - not a message"; 15 | c m2 "EOT (low-level fli protocol)"; 16 | m2 c "DDPopResponse(err=DRAGON_SUCCESS)"; 17 | m2 c "Value part 1"; 18 | m2 c "Value part ..."; 19 | m2 c "Value part n"; 20 | m2 c "EOT (low-level fli protocol)"; -------------------------------------------------------------------------------- /doc/devguide/images/ddict_put.srms: -------------------------------------------------------------------------------- 1 | 8 5 2 | begin components 3 | c "User Program" 4 | m1 "Manager 1" 5 | m2 "Manager 2" 6 | end; 7 | ltext@ c "Client program invokes put on a key/value pair"; 8 | ltext@ c "The Client buffers all writes for the key value."; 9 | ltext@ c "Then it hashes the key once key writing is complete"; 10 | ltext@ c "It chooses the manager by computing the remainder of"; 11 | ltext@ c "dividing by the number of managers."; 12 | ; 13 | c m2 "DDPut(clientID, chkptID) sent to manager main fli."; 14 | c m2 "Key value is written on one write - not a message"; 15 | c m2 "Value part 1"; 16 | c m2 "Value part ..."; 17 | c m2 "Value part n"; 18 | c m2 "EOT (low-level fli protocol)"; 19 | m2 c "DDPutResponse(status=DRAGON_SUCCESS) sent to client's buffered fli"; 20 | -------------------------------------------------------------------------------- /doc/devguide/images/dragon_domain_model.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | title "Dragon Domain Model" 4 | 5 | skinparam componentStyle uml2 6 | skinparam shadowing false 7 | skinparam monochrome true 8 | 9 | skinparam linetype ortho 10 | ' skinparam linetype polyline 11 | ' skinparam nodesep 30 12 | ' skinparam ranksep 40 13 | 14 | [Distributed System] o-- "1..*" [Node] 15 | [Federated Systems] o-- "1..*" [Distributed System] 16 | [System Resource] --o " 1..1" [Node] 17 | [Dragon Object] --> "1..*" [System Resource] : abstracts 18 | 19 | [Managed Object] --|> [Dragon Object] 20 | [Unmanaged Object] --|> [Dragon Object] 21 | [Refcounted Object] --|> [Managed Object] 22 | 23 | [Object Name] --> [Managed Object] : uniquely \nidentifies 24 | [Object UID] --> [Managed Object] : uniquely \nidentifies 25 | [Serialized Descriptor] --> [Managed Object] : uniquely \nidentifies 26 | [Serialized Descriptor] --> [Unmanaged Object] : uniquely \nidentifies 27 | [Directed Graph] --> [Workflow] : abstracts 28 | [Directed Graph] --|> [Dragon Object] 29 | [Workflow] --> "1..1" [Federated System] : runs on 30 | 31 | 'in case we want to hide components 32 | hide $hidden 33 | @enduml -------------------------------------------------------------------------------- /doc/devguide/images/manager.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/images/manager.png -------------------------------------------------------------------------------- /doc/devguide/images/working_set.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/images/working_set.png -------------------------------------------------------------------------------- /doc/devguide/runtime/images/deployment_single_node.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | skinparam componentStyle uml2 4 | skinparam shadowing false 5 | skinparam monochrome true 6 | skinparam linetype polyline 7 | 8 | 9 | node "Primary Compute Node" { 10 | artifact "Launcher" as LA 11 | artifact "Global Services" as GS 12 | artifact "Shepherd" as SH 13 | ' artifact "Transport Service" as TS 14 | artifact "User Program" as UP 15 | } 16 | 17 | LA -- SH : Launcher Channel 18 | LA -- GS: Launcher Backend Channel 19 | SH -- UP: stdin/stdout/stderr 20 | SH -- GS: Global Services Channel 21 | 22 | 23 | @enduml -------------------------------------------------------------------------------- /doc/devguide/runtime/images/global_services.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | skinparam componentStyle uml2 3 | skinparam shadowing false 4 | skinparam monochrome true 5 | skinparam linetype polyline 6 | 7 | package "Global Services" { 8 | 9 | [Input Message Handler] as IMH 10 | [Command Processor] as CP 11 | [stdin Forwarder] as FWD 12 | [std* Monitor] as MON 13 | [Output Message Handler] as OUT 14 | [Death Watcher] as WATCH 15 | 16 | IMH --> CP 17 | IMH --> FWD 18 | FWD --> OUT 19 | CP --> MON 20 | OUT <-- MON 21 | WATCH -u-> OUT 22 | 23 | } 24 | 25 | [Linux Kernel] as K 26 | interface "Linux Kernel API" as kernel_api 27 | 28 | interface "Global Services Channel" as global_services_channel 29 | interface "Shepherd Channel" as shepherd_channel 30 | interface "Launcher Backend Channel" as launcher_backend_channel 31 | 32 | K <-u- kernel_api 33 | 34 | OUT --> launcher_backend_channel 35 | OUT -d-> shepherd_channel 36 | IMH <-u- global_services_channel 37 | IMH --> kernel_api : halt event / normal halt 38 | WATCH -d-> kernel_api : waitpid() 39 | 40 | 41 | @enduml -------------------------------------------------------------------------------- /doc/devguide/runtime/images/gsmonitor.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/runtime/images/gsmonitor.graffle -------------------------------------------------------------------------------- /doc/devguide/runtime/images/managedservices.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/runtime/images/managedservices.graffle -------------------------------------------------------------------------------- /doc/devguide/runtime/images/processstates.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/runtime/images/processstates.graffle -------------------------------------------------------------------------------- /doc/devguide/runtime/images/singlenodeoverview.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/devguide/runtime/images/singlenodeoverview.graffle -------------------------------------------------------------------------------- /doc/devguide/runtime/index.rst: -------------------------------------------------------------------------------- 1 | .. _runtime_design: 2 | 3 | Runtime Design 4 | ++++++++++++++ 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | core.rst 10 | architecture.rst 11 | launcher.rst 12 | local_services.rst 13 | transport_agent.rst 14 | global_services.rst 15 | telemetry.rst 16 | single_node_deployment.rst 17 | multi_node_deployment.rst 18 | -------------------------------------------------------------------------------- /doc/devguide/runtime/launcher.rst: -------------------------------------------------------------------------------- 1 | .. _Launcher: 2 | 3 | Launcher 4 | ++++++++ 5 | 6 | Coming soon... -------------------------------------------------------------------------------- /doc/images/ddict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/ddict.png -------------------------------------------------------------------------------- /doc/images/ddict_128nodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/ddict_128nodes.png -------------------------------------------------------------------------------- /doc/images/ddict_128to648nodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/ddict_128to648nodes.png -------------------------------------------------------------------------------- /doc/images/dragon_api_stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/dragon_api_stack.png -------------------------------------------------------------------------------- /doc/images/dragon_arch_organization.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/dragon_arch_organization.jpg -------------------------------------------------------------------------------- /doc/images/dragon_deployment.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/dragon_deployment.jpg -------------------------------------------------------------------------------- /doc/images/dragon_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/dragon_logo.png -------------------------------------------------------------------------------- /doc/images/dragon_sw_stack.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/dragon_sw_stack.jpg -------------------------------------------------------------------------------- /doc/images/overview_queue_doc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/images/overview_queue_doc.jpg -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /doc/old_cython/channels.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreChannelsCython: 2 | 3 | Channels 4 | +++++++++++++ 5 | 6 | This is the Dragon channels interface for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.channels 17 | :members: Message, ChannelSendH, ChannelRecvH, Channel, Peer2PeerReadingChannelFile, Many2ManyReadingChannelFile, Many2ManyWritingChannelFile, GatewayMessage 18 | 19 | Functions 20 | ========= 21 | 22 | .. automodule:: dragon.channels 23 | :members: register_gateways_from_env, discard_gateways 24 | 25 | Enums 26 | ===== 27 | 28 | .. automodule:: dragon.channels 29 | :members: OwnershipOnSend, LockType, EventType, FlowControl, ChannelFlags 30 | 31 | 32 | Exceptions 33 | ========== 34 | 35 | .. automodule:: dragon.channels 36 | :members: ChannelError, ChannelTimeout, ChannelHandleNotOpenError, ChannelSendError, ChannelSendTimeout, ChannelFull, ChannelRecvError, ChannelRecvTimeout, ChannelEmpty, ChannelBarrierBroken, ChannelBarrierReady, ChannelRemoteOperationNotSupported 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_cython/dtypes.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreDTypesCython: 2 | 3 | Data Types 4 | ++++++++++ 5 | 6 | This are the Dragon data types for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.dtypes 17 | :members: WaitMode 18 | 19 | Functions 20 | ========= 21 | 22 | .. automodule:: dragon.dtypes 23 | :members: get_rc_string, getlasterrstr 24 | 25 | .. Enums 26 | .. ===== 27 | 28 | .. .. automodule:: dragon.dtypes 29 | .. :members: 30 | 31 | 32 | .. Exceptions 33 | .. ========== 34 | 35 | .. .. automodule:: dragon.dtypes 36 | .. :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_cython/fli.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreFLICython: 2 | 3 | File Like Interface 4 | +++++++++++++++++++++++++ 5 | 6 | This is the FLI API for Python. The classes presented here are a thin wrapper of the C API. The C language description of the :ref:`DragonFileLikeInterface` 7 | provides a detailed description of the FLI code and should be consulted for a good overview of this functionality. This section provides the 8 | description of the Python interface to this C code. 9 | 10 | .. contents:: 11 | :depth: 3 12 | :local: 13 | :backlinks: entry 14 | 15 | Classes 16 | ======= 17 | 18 | .. automodule:: dragon.fli 19 | :members: FLInterface, FLISendH, FLIRecvH, DragonFLIError, FLIEOT 20 | 21 | Exceptions 22 | ========== 23 | 24 | .. automodule:: dragon.fli 25 | :members: DragonFLIError, FLIEOT 26 | 27 | 28 | -------------------------------------------------------------------------------- /doc/old_cython/heap.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreHeapCython: 2 | 3 | Priority Heap 4 | +++++++++++++ 5 | 6 | This is the Dragon channel priority heap interface for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.pheap 17 | :members: PriorityHeap 18 | 19 | .. Functions 20 | .. ========= 21 | 22 | .. .. automodule:: dragon.pheap 23 | .. :members: 24 | 25 | .. Enums 26 | .. ===== 27 | 28 | .. .. automodule:: dragon.pheap 29 | .. :members: 30 | 31 | 32 | .. Exceptions 33 | .. ========== 34 | 35 | .. .. automodule:: dragon.pheap 36 | .. :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_cython/heapmanager.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreHeapManagerCython: 2 | 3 | Heap Manager 4 | ++++++++++++ 5 | 6 | This is the Dragon heap manager interface for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | -------------------------------------------------------------------------------- /doc/old_cython/lock.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreLockCython: 2 | 3 | Shared Lock 4 | +++++++++++ 5 | 6 | This is the Dragon shared lock interface for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.locks 17 | :members: DragonLock, GreedyLock, FIFOLock 18 | 19 | .. Functions 20 | .. ========= 21 | 22 | .. .. automodule:: dragon.locks 23 | .. :members: 24 | 25 | .. Enums 26 | .. ===== 27 | 28 | .. .. automodule:: dragon.locks 29 | .. :members: 30 | 31 | 32 | .. Exceptions 33 | .. ========== 34 | 35 | .. .. automodule:: dragon.locks 36 | .. :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_cython/managed_memory.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreManagedMemoryCython: 2 | 3 | Managed Memory 4 | +++++++++++++++++++ 5 | 6 | This is the Dragon managed memory interface for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.managed_memory 17 | :members: MemoryPoolAttr, MemoryAlloc, MemoryAllocations, MemoryPool 18 | 19 | .. Functions 20 | .. ========= 21 | 22 | .. .. automodule:: dragon.managed_memory 23 | .. :members: 24 | 25 | Enums 26 | ===== 27 | 28 | .. automodule:: dragon.managed_memory 29 | :members: PoolType, AllocType, 30 | 31 | 32 | Exceptions 33 | ========== 34 | 35 | .. automodule:: dragon.managed_memory 36 | :members: DragonMemoryError, DragonPoolError, DragonPoolCreateFail, DragonPoolAttachFail, DragonPoolDetachFail, DragonPoolAllocationNotAvailable 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_cython/utils.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreUtilsCython: 2 | 3 | Core Utils 4 | ++++++++++ 5 | 6 | This are the Dragon core utils for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.utils 17 | :members: B64 18 | 19 | Functions 20 | ========= 21 | 22 | .. automodule:: dragon.utils 23 | :members: 24 | 25 | Enums 26 | ===== 27 | 28 | .. automodule:: dragon.utils 29 | :members: 30 | 31 | 32 | Exceptions 33 | ========== 34 | 35 | .. automodule:: dragon.utils 36 | :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_devguide/envvars.rst: -------------------------------------------------------------------------------- 1 | Environment Variables 2 | +++++++++++++++++++++ 3 | 4 | .. This page should be an FAQ item pointing to the infrastructure reference once we have it ! 5 | 6 | The following environment variables are set for every process that is started by 7 | a call to the Dragon Client API, to Dragon Native or to Python Multiprocessing. 8 | 9 | .. autoclass:: dragon.infrastructure.parameters::LaunchParameters 10 | :no-private-members: 11 | :no-members: 12 | -------------------------------------------------------------------------------- /doc/old_devguide/images/overview.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/images/overview.graffle -------------------------------------------------------------------------------- /doc/old_devguide/infrastructure.rst: -------------------------------------------------------------------------------- 1 | Dragon Infrastructure 2 | +++++++++++++++++++++ 3 | 4 | 5 | Architecture 6 | ============ 7 | 8 | 9 | .. figure:: images/infrastructure_architecture.svg 10 | :scale: 75% 11 | :name: dragon-inf-api-architecture 12 | 13 | **Architecture of the Dragon Infrastructure API** 14 | 15 | :numref:`dragon-inf-api-architecture` shows a UML2 component diagram of the Dragon infrastructure API and its components. 16 | 17 | The infrastructure API is consumed by Dragon Services: Local Services, Global 18 | Services, Launcher Backend, and the Transport Agents. It consists mostly of conventions, like message types and common IDs. 19 | The API also implements a basic connection object that abstract Channels for convenience and performance. 20 | 21 | 22 | API Reference 23 | ============== 24 | 25 | Here is the :ref:`InfrastructureAPI` API. -------------------------------------------------------------------------------- /doc/old_devguide/internal/communication/channels/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/communication/communication.rst: -------------------------------------------------------------------------------- 1 | .. _CommunicationComponents: 2 | 3 | Communication Components 4 | ++++++++++++++++++++++++ 5 | 6 | .. toctree:: 7 | 8 | channels/channels.rst 9 | channels/gateway_channels.rst 10 | channels/priority_heap.rst 11 | mrnet.rst 12 | channels/channels_performance.rst 13 | 14 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/components.rst: -------------------------------------------------------------------------------- 1 | .. _Components: 2 | 3 | Low-level Components 4 | ++++++++++++++++++++ 5 | 6 | .. toctree:: 7 | 8 | managed_memory/managed_memory.rst 9 | managed_memory/cy_managedmemory.rst 10 | unordered_map.rst 11 | broadcast.rst 12 | 13 | .. The scalable_locks.rst contains duplicate definitions and includes 14 | .. the cy_scalable_locks.rst which also contains duplicate definitions. 15 | .. scalable_locks/scalable_locks.rst 16 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/images/bcast.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/components/images/bcast.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/managed_memory/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/managed_memory/images/heapallocations.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/components/managed_memory/images/heapallocations.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/managed_memory/images/heapfree1.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/components/managed_memory/images/heapfree1.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/managed_memory/images/heapfree2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/components/managed_memory/images/heapfree2.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/managed_memory/images/heapfree3.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/components/managed_memory/images/heapfree3.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/managed_memory/images/heapfree4.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/components/managed_memory/images/heapfree4.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/managed_memory/images/heapfree5.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/components/managed_memory/images/heapfree5.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/scalable_locks/images/scalable_locks.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | skinparam componentStyle uml2 4 | skinparam shadowing false 5 | skinparam monochrome true 6 | 7 | ' skinparam linetype ortho 8 | skinparam linetype polyline 9 | ' skinparam nodesep 10 10 | ' skinparam ranksep 20 11 | 12 | 13 | package "Scalable Locks" as SL { 14 | [FIFO Lock] as FIFOLOCK 15 | [Greedy Lock] as GRLOCK 16 | [Readers Writer Lock] as RWLOCK 17 | 18 | interface "FIFO lock API" as fifo_api 19 | interface "Greedy lock API" as greedy_api 20 | interface "RWLock API " as rw_api 21 | 22 | FIFOLOCK <-- fifo_api 23 | GRLOCK <-- greedy_api 24 | RWLOCK <-- rw_api 25 | } 26 | 27 | interface "Scalable Locks API" as scalable_locks_api 28 | 29 | fifo_api <-- scalable_locks_api 30 | greedy_api <-- scalable_locks_api 31 | rw_api <-- scalable_locks_api 32 | 33 | @enduml 34 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/components/unordered_map.rst: -------------------------------------------------------------------------------- 1 | .. _UnorderedMap: 2 | 3 | Unordered Map 4 | +++++++++++++ 5 | 6 | A hash table on shared memory. -------------------------------------------------------------------------------- /doc/old_devguide/internal/infrastructure/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/infrastructure/images/multinodeoverview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/infrastructure/images/multinodeoverview.pdf -------------------------------------------------------------------------------- /doc/old_devguide/internal/infrastructure/images/overlay_network_fanout.puml: -------------------------------------------------------------------------------- 1 | @startmindmap 2 | 3 | * Front End 4 | ** Node 0 5 | *** Node 32 6 | **** 1056 7 | **** ... 8 | **** 1087 9 | *** ... 10 | **** ... 11 | *** Node 63 12 | **** Node 2048 13 | **** ... 14 | **** Node 2079 15 | ** ... 16 | *** ... 17 | **** ... 18 | ** Node 8 19 | *** Node 288 20 | **** Node 9248 21 | **** ... 22 | **** Node 9279 23 | *** ... 24 | **** ... 25 | *** Node 311 26 | **** Node 9984 27 | **** ... 28 | **** Node 9999 29 | *** ... 30 | *** Node 319 31 | ** ... 32 | *** ... 33 | ** Node 31 34 | *** Node 1024 35 | *** ... 36 | *** 1055 37 | 38 | @endmindmap -------------------------------------------------------------------------------- /doc/old_devguide/internal/infrastructure/infrastructure.rst: -------------------------------------------------------------------------------- 1 | .. _Infrastructure: 2 | 3 | Infrastructure 4 | ++++++++++++++ 5 | 6 | 7 | .. toctree:: 8 | 9 | architecture.rst 10 | messages_api.rst 11 | processes.rst 12 | conventional_ids.rst 13 | single_node_deployment.rst 14 | multi_node_deployment.rst 15 | bootstrapping.rst 16 | logging.rst 17 | policy.rst 18 | overlay_network.rst -------------------------------------------------------------------------------- /doc/old_devguide/internal/infrastructure/overlay_network.rst: -------------------------------------------------------------------------------- 1 | .. _Overlay_Network: 2 | 3 | Dragon Overlay Network 4 | ++++++++++++++++++++++ 5 | 6 | In the multi-node case, Dragon establishes an Overlay Network to communicate between the Dragon Launcher FrontEnd and 7 | the Dragon Launcher Backend processes running on each backend compute node. To establish this Overlay Network, Dragon uses 8 | the Dragon TCP Network Agent in a Fanout Tree. The Fanout Tree uses a default branching factor of 32 nodes. The hierarchy 9 | of the tree follows the order of nodes in the node list with the root being the front end and children being located at 10 | bn+1 to bn+b where b is the branching factor (32) of the tree. This enables Dragon to scale and communicate with a large 11 | number of nodes efficiently. 12 | 13 | :numref:`overlay-network-fanout` shows an example of a 10,000 node Dragon Overlay Network Fanout. 14 | 15 | .. figure:: images/overlay_network_fanout.svg 16 | :name: overlay-network-fanout 17 | 18 | **Example 10,000 node Overlay Network fanout** -------------------------------------------------------------------------------- /doc/old_devguide/internal/internal.rst: -------------------------------------------------------------------------------- 1 | Internal Documentation (TO BE REVIEWED) 2 | +++++++++++++++++++++++++++++++++++++++ 3 | 4 | This area documents internal and developmental components and interfaces. They may be part of the open source 5 | or of the proprietary implementation but either way they are not part of the Dragon public API. 6 | 7 | If they are involved in any upstreamed code they should be provided not as user documentation but instead as 8 | documentation for maintainers. 9 | 10 | .. toctree:: 11 | :caption: Contents 12 | :maxdepth: 2 13 | 14 | introduction.rst 15 | using_dragon/using_dragon.rst 16 | infrastructure/infrastructure.rst 17 | services/services.rst 18 | communication/communication.rst 19 | components/components.rst 20 | ref/ref.rst 21 | testing/testing.rst -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/Cython/dtypes.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreDTypesCython: 2 | 3 | Data Types 4 | ++++++++++ 5 | 6 | This are the Dragon data types for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.dtypes 17 | :members: WaitMode 18 | 19 | Functions 20 | ========= 21 | 22 | .. automodule:: dragon.dtypes 23 | :members: get_rc_string, getlasterrstr 24 | 25 | .. Enums 26 | .. ===== 27 | 28 | .. .. automodule:: dragon.dtypes 29 | .. :members: 30 | 31 | 32 | .. Exceptions 33 | .. ========== 34 | 35 | .. .. automodule:: dragon.dtypes 36 | .. :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/Cython/heap.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreHeapCython: 2 | 3 | Priority Heap 4 | +++++++++++++ 5 | 6 | This is the Dragon channel priority heap interface for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.pheap 17 | :members: PriorityHeap 18 | 19 | .. Functions 20 | .. ========= 21 | 22 | .. .. automodule:: dragon.pheap 23 | .. :members: 24 | 25 | .. Enums 26 | .. ===== 27 | 28 | .. .. automodule:: dragon.pheap 29 | .. :members: 30 | 31 | 32 | .. Exceptions 33 | .. ========== 34 | 35 | .. .. automodule:: dragon.pheap 36 | .. :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/Cython/lock.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreLockCython: 2 | 3 | Shared Lock 4 | +++++++++++ 5 | 6 | This is the Dragon shared lock interface for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.locks 17 | :members: DragonLock, GreedyLock, FIFOLock 18 | 19 | .. Functions 20 | .. ========= 21 | 22 | .. .. automodule:: dragon.locks 23 | .. :members: 24 | 25 | .. Enums 26 | .. ===== 27 | 28 | .. .. automodule:: dragon.locks 29 | .. :members: 30 | 31 | 32 | .. Exceptions 33 | .. ========== 34 | 35 | .. .. automodule:: dragon.locks 36 | .. :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/Cython/utils.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreUtilsCython: 2 | 3 | Core Utils 4 | ++++++++++ 5 | 6 | This are the Dragon core utils for Python 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | Classes 14 | ======= 15 | 16 | .. automodule:: dragon.utils 17 | :members: B64 18 | 19 | Functions 20 | ========= 21 | 22 | .. automodule:: dragon.utils 23 | :members: 24 | 25 | Enums 26 | ===== 27 | 28 | .. automodule:: dragon.utils 29 | :members: 30 | 31 | 32 | Exceptions 33 | ========== 34 | 35 | .. automodule:: dragon.utils 36 | :members: 37 | 38 | 39 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/bitset.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreBitset: 2 | 3 | Bitset 4 | ++++++ 5 | 6 | .. doxygenfile:: bitset.c -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/hashtable.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreHashtable: 2 | 3 | Hashtable 4 | +++++++++ 5 | 6 | .. doxygenfile:: hashtable.c -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/heap_manager.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreHeapManager: 2 | 3 | 4 | Heap Manager 5 | ++++++++++++ 6 | 7 | .. doxygenfile:: heap_manager.c -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/priority_heap.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCorePriorityHeap: 2 | 3 | Priority Heap 4 | +++++++++++++ 5 | 6 | .. doxygenfile:: priority_heap.c -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/shared_lock.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreSharedLock: 2 | 3 | Shared Lock 4 | +++++++++++ 5 | 6 | .. doxygenfile:: shared_lock.c -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/ulist.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreUList: 2 | 3 | Unordered List 4 | ++++++++++++++ 5 | 6 | .. doxygenfile:: ulist.cpp -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/umap.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreUMap: 2 | 3 | 4 | Unordered Map 5 | +++++++++++++ 6 | 7 | .. doxygenfile:: umap.cpp -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/core/c/utils.rst: -------------------------------------------------------------------------------- 1 | .. _DragonCoreUtils: 2 | 3 | 4 | Utilities 5 | +++++++++ 6 | 7 | .. doxygenfile:: utils.c -------------------------------------------------------------------------------- /doc/old_devguide/internal/ref/ref.rst: -------------------------------------------------------------------------------- 1 | Internal API Reference 2 | +++++++++++++++++++++++++ 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | core/index.rst -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/MultiNodeOverview.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/MultiNodeOverview.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/PoolOverview.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/PoolOverview.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/jupytermode.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/jupytermode.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/launcher_single_node.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | skinparam componentStyle uml2 3 | skinparam shadowing false 4 | skinparam monochrome true 5 | skinparam linetype polyline 6 | 7 | package "Launcher on Single Node Deployment" { 8 | 9 | [Launcher Backend] as LB 10 | [Launcher Frontend] as LF 11 | 12 | interface "stdout/stderr" as stdout 13 | interface "stdin" as stdin 14 | 15 | LF -d-> stdin 16 | LF <-u- stdout 17 | LB <-d- stdin 18 | LB -u-> stdout 19 | 20 | } 21 | 22 | interface "POSIX MQ" as posix_message_queue 23 | interface "Launcher Channel" as launcher_channel 24 | interface "Launcher Backend Channel" as bela_channel 25 | interface "Shepherd Channel" as shepherd_channel 26 | interface "Global Services Channel" as global_services_channel 27 | 28 | LB <-r-> posix_message_queue 29 | LB -d-> shepherd_channel 30 | LB -d-> global_services_channel 31 | LB <-u- bela_channel 32 | LB <-u- launcher_channel 33 | 34 | @enduml -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/launchercomponents.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/launchercomponents.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/launcherstates.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/launcherstates.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/servermode.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/servermode.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/shepherdstructure.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/shepherdstructure.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/images/singlenodelauncher.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/images/singlenodelauncher.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/services.rst: -------------------------------------------------------------------------------- 1 | .. _Services: 2 | 3 | Services 4 | ++++++++ 5 | 6 | .. toctree:: 7 | 8 | launcher.rst 9 | global_services.rst 10 | local_services.rst 11 | transport_agent/transport_agent.rst 12 | telemetry/telemetry.rst 13 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/RemoteSendRecvStructure.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/transport_agent/images/RemoteSendRecvStructure.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/TSTSComponents.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/transport_agent/images/TSTSComponents.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/channels.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/transport_agent/images/channels.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/newchannelops.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/transport_agent/images/newchannelops.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/origchannelops.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/transport_agent/images/origchannelops.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/overview.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/transport_agent/images/overview.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/images/overview2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/services/transport_agent/images/overview2.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.agent.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.agent 2 | ========================== 3 | 4 | .. automodule:: dragon.transport.tcp.agent 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | .. rubric:: Classes 18 | 19 | .. autosummary:: 20 | 21 | Agent 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.client.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.client 2 | =========================== 3 | 4 | .. automodule:: dragon.transport.tcp.client 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | .. rubric:: Functions 14 | 15 | .. autosummary:: 16 | 17 | create_request 18 | 19 | 20 | 21 | 22 | 23 | .. rubric:: Classes 24 | 25 | .. autosummary:: 26 | 27 | Client 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.errno.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.errno 2 | ========================== 3 | 4 | .. automodule:: dragon.transport.tcp.errno 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | .. rubric:: Functions 14 | 15 | .. autosummary:: 16 | 17 | get_errno 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.io.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.io 2 | ======================= 3 | 4 | .. automodule:: dragon.transport.tcp.io 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | .. rubric:: Classes 18 | 19 | .. autosummary:: 20 | 21 | CodableIO 22 | EnumIO 23 | FixedBytesIO 24 | IPAddressIO 25 | IPv4AddressIO 26 | IPv6AddressIO 27 | StructIO 28 | UUIDBytesIO 29 | VariableBytesIO 30 | VariableTextIO 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp 2 | ==================== 3 | 4 | .. automodule:: dragon.transport.tcp 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | .. rubric:: Modules 26 | 27 | .. autosummary:: 28 | :toctree: 29 | :recursive: 30 | 31 | agent 32 | client 33 | errno 34 | io 35 | messages 36 | server 37 | task 38 | transport 39 | util 40 | 41 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.server.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.server 2 | =========================== 3 | 4 | .. automodule:: dragon.transport.tcp.server 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | .. rubric:: Classes 18 | 19 | .. autosummary:: 20 | 21 | Server 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.task.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.task 2 | ========================= 3 | 4 | .. automodule:: dragon.transport.tcp.task 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | .. rubric:: Functions 14 | 15 | .. autosummary:: 16 | 17 | cancel_all_tasks 18 | run_forever 19 | 20 | 21 | 22 | 23 | 24 | .. rubric:: Classes 25 | 26 | .. autosummary:: 27 | 28 | TaskMixin 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.transport.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.transport 2 | ============================== 3 | 4 | .. automodule:: dragon.transport.tcp.transport 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | .. rubric:: Functions 14 | 15 | .. autosummary:: 16 | 17 | close_writer 18 | create_pipe_connections 19 | create_pipe_streams 20 | create_streams 21 | writer_addrs 22 | 23 | 24 | 25 | 26 | 27 | .. rubric:: Classes 28 | 29 | .. autosummary:: 30 | 31 | Address 32 | StreamTransport 33 | Transport 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/dragon.transport.tcp.util.rst: -------------------------------------------------------------------------------- 1 | dragon.transport.tcp.util 2 | ========================= 3 | 4 | .. automodule:: dragon.transport.tcp.util 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | .. rubric:: Functions 14 | 15 | .. autosummary:: 16 | 17 | attach_channel 18 | create_msg 19 | mem_descr_msg 20 | mem_pool_msg 21 | open_handle 22 | poll_channel 23 | recv_msg 24 | seconds_remaining 25 | send_msg 26 | unget_nowait 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/services/transport_agent/tcp/index.rst: -------------------------------------------------------------------------------- 1 | .. _TCPTransport: 2 | 3 | TCP Transport 4 | +++++++++++++ 5 | 6 | 7 | Reference 8 | ========= 9 | 10 | .. currentmodule:: dragon.transport 11 | 12 | .. autosummary:: 13 | :toctree: 14 | :recursive: 15 | 16 | tcp 17 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/testing/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/testing/images/shepherdconnections.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/old_devguide/internal/testing/images/shepherdconnections.graffle -------------------------------------------------------------------------------- /doc/old_devguide/internal/using_dragon/dragon_native.rst: -------------------------------------------------------------------------------- 1 | .. _DragonWithDragonNative: 2 | 3 | Dragon with Dragon Native API 4 | +++++++++++++++++++++++++++++ 5 | 6 | Architecture 7 | ------------ 8 | 9 | .. figure:: images/dragon_with_native.svg 10 | :name: dragon-with-native 11 | 12 | **Dragon with native API Architecture** 13 | 14 | **FIXME: As we complete the dragon native implementation, this section could include basic usage docs and 15 | examples showcasing common usage patterns.** 16 | 17 | Examples 18 | ======== 19 | 20 | Pool Single Node 21 | ---------------- 22 | 23 | Pool Multi Node 24 | ---------------- -------------------------------------------------------------------------------- /doc/old_devguide/internal/using_dragon/images/.gitignore: -------------------------------------------------------------------------------- 1 | - *.png 2 | -------------------------------------------------------------------------------- /doc/old_devguide/internal/using_dragon/images/dragon_with_native.puml: -------------------------------------------------------------------------------- 1 | ' The Python Multiprocessing use case. 2 | ' 3 | 4 | 5 | @startuml 6 | 7 | ' title "Dragon Distributed Architecture with Python Multiprocessing" 8 | 9 | skinparam componentStyle uml2 10 | skinparam shadowing false 11 | skinparam monochrome true 12 | 13 | ' skinparam linetype ortho 14 | skinparam linetype polyline 15 | ' skinparam nodesep 10 16 | ' skinparam ranksep 20 17 | 18 | 19 | interface "stdin/stdout/stderr" as posix_std 20 | 21 | component "Infrastructure" as INF { 22 | portin "Messages API" as imsg_api 23 | [Messages] as IMSG 24 | 25 | IMSG -u-> imsg_api 26 | } 27 | 28 | [User Program] as User_Program 29 | [Dragon Native] as DN 30 | [Dragon GS Client] as DCAPI 31 | User_Program --> DN 32 | User_Program <--> posix_std 33 | DN --> DCAPI 34 | 35 | 36 | [Shepherd] as SH 37 | [Global Services] as GS 38 | [Launcher Backend] as BELA 39 | 40 | posix_std <--> SH 41 | DCAPI --> imsg_api 42 | 43 | SH <--> imsg_api 44 | GS <--> imsg_api 45 | BELA <--> imsg_api 46 | 47 | 'in case we want to hide components 48 | hide $hidden 49 | @enduml -------------------------------------------------------------------------------- /doc/old_devguide/internal/using_dragon/python_multiprocessing.rst: -------------------------------------------------------------------------------- 1 | .. _DragonWithPythonMultiprocessing: 2 | 3 | Dragon with Python Multiprocessing 4 | ++++++++++++++++++++++++++++++++++ 5 | 6 | **FIXME: As we complete multiprocessing implementation, this section could include basic usage docs and 7 | examples showcasing common usage patterns.** 8 | 9 | Architecture 10 | ============ 11 | 12 | .. figure:: images/dragon_with_python_multiprocessing.svg 13 | :name: dragon-with-mp 14 | 15 | **Dragon with Python Multiprocessing Architecture** 16 | 17 | Examples 18 | ======== 19 | 20 | Pool Single Node 21 | ---------------- 22 | 23 | Pool Multi Node 24 | ---------------- -------------------------------------------------------------------------------- /doc/ref.rst: -------------------------------------------------------------------------------- 1 | .. _DragonAPI: 2 | 3 | API Reference 4 | +++++++++++++ 5 | 6 | The best place to begin with Dragon is with :external+python:doc:`Python multiprocessing `. 7 | Users move to the Dragon API directly as they find the need for more control, such as explicitly placing 8 | processes or objects on CPU and GPU nodes, or they want to use other features in Dragon like the in-memory distributed 9 | dictionary. 10 | 11 | 12 | User API 13 | ======== 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | 18 | ref/mpbridge/multiprocessing.rst 19 | ref/data/index.rst 20 | ref/telemetry/index.rst 21 | ref/ai/index.rst 22 | ref/workflows/index.rst 23 | ref/native/index.rst 24 | ref/policy.rst 25 | 26 | 27 | Low-level API 28 | ============= 29 | 30 | .. toctree:: 31 | :maxdepth: 2 32 | 33 | ref/inf/index.rst 34 | ref/client/index.rst 35 | ref/core/index.rst 36 | -------------------------------------------------------------------------------- /doc/ref/ai/index.rst: -------------------------------------------------------------------------------- 1 | .. _AIAPI: 2 | 3 | AI 4 | ++ 5 | 6 | These interfaces enable integrations into key AI packages, such as `PyTorch `__. A Common use is 7 | for enhanced data loading. 8 | 9 | 10 | Python Reference 11 | ================ 12 | 13 | .. currentmodule:: dragon.ai.torch 14 | 15 | .. autosummary:: 16 | :toctree: 17 | :recursive: 18 | 19 | DragonDataset -------------------------------------------------------------------------------- /doc/ref/client/images/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export PATH=$PATH:/home/users/klee/home/SRMStoFigs 3 | ls -1 *.srms | xargs -n 1 srms2png 4 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_ChannelDescriptor_attach.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.attach()"; 7 | UP GS "CHANNEL_ATTACH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success: attached"; 10 | GS UP "CHANNEL_IS_ATTACHED"+ 11 | GS UP "c_uid"; 12 | ltext@ GS "fail: no such channel"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_ChannelDescriptor_detach.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.detach()"; 7 | UP GS "CHANNEL_DETACH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success: detached"; 10 | GS UP "CHANNEL_IS_DETACHED"+ 11 | GS UP "c_uid"; 12 | ltext@ GS "fail: no such channel"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_ChannelDescriptor_get_recvh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.get_recvh()"; 7 | UP GS "CHANNEL_GET_RECVH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_RECVH"+ 11 | GS UP "RecvChannel"; 12 | ltext@ GS "fail: recv handle unavailable"; 13 | GS UP "FAIL_CHANNEL_RECVH_UNAVAIL"+ 14 | GS UP "None"; 15 | ltext@ GS "fail: no such channel"; 16 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | GS UP "None"; 18 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_ChannelDescriptor_get_sendh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.get_sendh()"; 7 | UP GS "CHANNEL_GET_SENDH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_SENDH"+ 11 | GS UP "SendChannel"; 12 | ltext@ GS "fail: send handle unavailable"; 13 | GS UP "FAIL_CHANNEL_SENDH_UNAVAIL"+ 14 | GS UP "None"; 15 | ltext@ GS "fail: no such channel"; 16 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | GS UP "None"; 18 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_ChannelDescriptor_refresh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.refresh()"; 7 | UP GS "CHANNEL_REFRESH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_DESCRIPTOR"+ 11 | GS UP "ChannelDescriptor"; 12 | ltext@ GS "fail: no such channel"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_RecvChannel_close.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "RecvChannel.close()"; 7 | UP LS "CHANNEL_CLOSE_RECV"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_RECV_CLOSED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already closed"; 13 | LS UP "FAIL_CHANNEL_CLOSE_RECV_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_RecvChannel_open.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "RecvChannel.open()"; 7 | UP LS "CHANNEL_OPEN_RECV"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_RECV_OPENED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already opened"; 13 | LS UP "FAIL_CHANNEL_OPEN_RECV_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | 19 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_SendChannel_close.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "SendChannel.close()"; 7 | UP LS "CHANNEL_CLOSE_SEND"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_SEND_CLOSED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already closed"; 13 | LS UP "FAIL_CHANNEL_CLOSE_SEND_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_SendChannel_open.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "SendChannel.open()"; 7 | UP LS "CHANNEL_OPEN_SEND"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_SEND_OPENED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already opened"; 13 | LS UP "FAIL_CHANNEL_OPEN_SEND_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | 19 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_create.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "Channel()"; 7 | UP GS "CHANNEL_CREATE"+ 8 | UP GS "p_uid, channel props, [user_name]"; 9 | ltext@ GS "success, channel created"; 10 | ltext@ GS "new channel n_cid and names assigned"; 11 | GS UP "CHANNEL_DESCRIPTOR"+ 12 | GS UP "ChannelDescriptor"; 13 | ltext@ GS "fail: channel couldn't be made"; 14 | GS UP "FAIL_CHANNEL_CREATE"+ 15 | GS UP "error info"; 16 | ltext@ GS "fail: user name in use"; 17 | GS UP "FAIL_CHANNEL_ALREADY"+ 18 | GS UP "ChannelDescriptor"; 19 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_destroy.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "channel.destroy()"; 7 | UP GS "CHANNEL_DESTROY"+ 8 | UP GS "p_uid, target(c_uid| user_name)"; 9 | ltext@ GS "success: channel removed"; 10 | GS UP "CHANNEL_IS_DESTROYED"+ 11 | GS UP "None"; 12 | ltext@ GS "fail: channel is busy"; 13 | GS UP "FAIL_CHANNEL_BUSY"+ 14 | GS UP "error info"; 15 | ltext@ GS "fail: channel unknown"; 16 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | GS UP "None"; 18 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_list.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "channel.list()"; 7 | UP GS "CHANNEL_LIST"+ 8 | UP GS "p_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_LIST_RESP"+ 11 | GS UP "[(c_uid, name)]"; 12 | 13 | -------------------------------------------------------------------------------- /doc/ref/client/images/channel_query.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "channel.query()"; 7 | UP GS "CHANNEL_QUERY"+ 8 | UP GS "p_uid, target(c_uid | user_name)"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_DESCRIPTOR"+ 11 | GS UP "ChannelDescriptor"; 12 | ltext@ GS "fail: names not found"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc/ref/client/images/process_ProcessInfo_refresh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.ProcessInfo.refresh()"; 7 | UP GS "PROCESS_QUERY"+ 8 | UP GS "n_uid, target(n_uid | user_name)"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO"+ 11 | GS UP "ProcessInfo"; 12 | ltext@ GS "fail: names not found"; 13 | ltext@ GS "shouldn't ever fail - process records persist"; 14 | GS UP "FAIL_PROCESS_UNKNOWN"+ 15 | GS UP "None"; 16 | -------------------------------------------------------------------------------- /doc/ref/client/images/process_create.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.create()"; 7 | UP GS "PROCESS_CREATE"+ 8 | UP GS "p_uid, [user_name] exe, args, env"; 9 | ltext@ GS "success, process created"; 10 | ltext@ GS "new process p_uid and names assigned"; 11 | GS UP "PROCESS_INFO"+ 12 | GS UP "ProcessInfo"; 13 | ltext@ GS "fail: process couldn't start"; 14 | GS UP "FAIL_PROCESS_CREATE"+ 15 | GS UP "error info"; 16 | ltext@ GS "fail: user name in use"; 17 | GS UP "FAIL_PROCESS_ALREADY"+ 18 | GS UP "ProcessInfo"; 19 | -------------------------------------------------------------------------------- /doc/ref/client/images/process_join.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.join()"; 7 | rtext@ UP "block: server does timeout"; 8 | UP GS "PROCESS_JOIN"+ 9 | UP GS "p_uid, target(p_uid | user_name)"; 10 | ltext@ GS "success, process ended"; 11 | GS UP "PROCESS_IS_JOINED"+ 12 | GS UP "return code"; 13 | ltext@ GS "timer expired"; 14 | GS UP "FAIL_PROCESS_JOIN_TIMEOUT"+ 15 | GS UP "None"; 16 | ltext@ GS "fail: names not found"; 17 | GS UP "FAIL_PROCESS_UNKNOWN"+ 18 | GS UP "None"; 19 | -------------------------------------------------------------------------------- /doc/ref/client/images/process_kill.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.kill()"; 7 | UP GS "PROCESS_KILL"+ 8 | UP GS "p_uid, target(p_uid | user_name), sig"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO"+ 11 | GS UP "ProcessInfo"; 12 | ltext@ GS "fail: names not found"; 13 | GS UP "FAIL_PROCESS_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc/ref/client/images/process_list.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.list()"; 7 | UP GS "PROCESS_LIST"+ 8 | UP GS "p_uid"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO_LIST"+ 11 | GS UP "[(p_uid, name)]"; 12 | -------------------------------------------------------------------------------- /doc/ref/client/images/process_query.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.query()"; 7 | UP GS "PROCESS_QUERY"+ 8 | UP GS "p_uid, target(p_uid | user_name)"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO"+ 11 | GS UP "ProcessInfo"; 12 | ltext@ GS "fail: names not found"; 13 | GS UP "FAIL_PROCESS_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc/ref/client/index.rst: -------------------------------------------------------------------------------- 1 | .. _ClientAPI: 2 | 3 | Global Services Client 4 | ++++++++++++++++++++++ 5 | 6 | The Global Services client API is for managing the life-cycle of managed objects and extended capabilities. Most other 7 | high-level interfaces use this API for managing objects that require visibility from other processes. 8 | 9 | Python Reference 10 | ================ 11 | 12 | .. currentmodule:: dragon.globalservices 13 | 14 | .. autosummary:: 15 | :toctree: 16 | :recursive: 17 | 18 | api_setup 19 | pool 20 | process 21 | channel 22 | node 23 | group 24 | -------------------------------------------------------------------------------- /doc/ref/core/c/channelsets.rst: -------------------------------------------------------------------------------- 1 | .. _ChannelSets: 2 | 3 | ChannelSets 4 | =============== 5 | 6 | 7 | .. _ChannelSetAPI: 8 | 9 | 10 | API 11 | ''''''' 12 | 13 | These are the user-facing API calls for ChannelSet objects. 14 | 15 | .. doxygenfile:: channelsets.c -------------------------------------------------------------------------------- /doc/ref/core/c/fli.rst: -------------------------------------------------------------------------------- 1 | .. _DragonFileLikeInterface: 2 | 3 | File Like Interface 4 | ==================== 5 | 6 | Constants 7 | '''''''''''''''''''''''''''' 8 | 9 | .. doxygengroup:: fli_consts 10 | :content-only: 11 | :members: 12 | 13 | Structures 14 | '''''''''''''''' 15 | 16 | .. doxygengroup:: fli_structs 17 | :content-only: 18 | :members: 19 | 20 | FLI Lifecycle Management 21 | ''''''''''''''''''''''''''''' 22 | 23 | .. doxygengroup:: fli_lifecycle 24 | :content-only: 25 | :members: 26 | 27 | FLI Send/Recv Handle Management 28 | '''''''''''''''''''''''''''''''''''' 29 | 30 | .. doxygengroup:: fli_handles 31 | :content-only: 32 | :members: 33 | 34 | FLI Send/Recv Functions 35 | ''''''''''''''''''''''''''''' 36 | 37 | .. doxygengroup:: fli_sendrecv 38 | :content-only: 39 | :members: 40 | -------------------------------------------------------------------------------- /doc/ref/core/c/image/bcast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/doc/ref/core/c/image/bcast.png -------------------------------------------------------------------------------- /doc/ref/core/index.rst: -------------------------------------------------------------------------------- 1 | .. _CoreAPI: 2 | 3 | Core 4 | ++++ 5 | 6 | The Core API is for fundamental memory management and communication primitives on which all of Dragon is built. 7 | 8 | .. _core_python_api: 9 | 10 | Python Reference 11 | ================ 12 | 13 | .. currentmodule:: dragon 14 | 15 | .. autosummary:: 16 | :toctree: 17 | :recursive: 18 | 19 | dtypes 20 | managed_memory 21 | channels 22 | fli 23 | 24 | Low-level Python Reference 25 | ========================== 26 | 27 | .. currentmodule:: dragon 28 | 29 | .. autosummary:: 30 | :toctree: 31 | :recursive: 32 | 33 | pheap 34 | locks 35 | utils 36 | pmod 37 | 38 | .. _core_c_api: 39 | 40 | C Reference 41 | =========== 42 | 43 | .. toctree:: 44 | :maxdepth: 1 45 | 46 | c/managed_memory.rst 47 | c/channels.rst 48 | c/channelsets.rst 49 | c/fli.rst -------------------------------------------------------------------------------- /doc/ref/data/index.rst: -------------------------------------------------------------------------------- 1 | .. _DataAPI: 2 | 3 | Data 4 | ++++ 5 | 6 | Dragon has APIs for managing data in a scalable fashion. A prominent member of 7 | these APIs is the distributed dictionary. Descriptions and APIs are grouped 8 | together for each of the supported Dragon Data Types. 9 | 10 | 11 | Python Reference 12 | ================ 13 | 14 | .. currentmodule:: dragon.data 15 | 16 | .. autosummary:: 17 | :toctree: 18 | :recursive: 19 | 20 | DDict 21 | zarr.Store 22 | 23 | 24 | C Reference 25 | =========== 26 | 27 | .. toctree:: 28 | :maxdepth: 3 29 | 30 | C/ddict.rst -------------------------------------------------------------------------------- /doc/ref/inf/index.rst: -------------------------------------------------------------------------------- 1 | .. _InfrastructureAPI: 2 | 3 | Infrastructure 4 | ++++++++++++++ 5 | 6 | :ref:`Services` in the Dragon runtime interact with each other using messages transported with a variety of 7 | different means (mostly :ref:`Channels`). Although there is the Client API to construct and send these 8 | messages, the messages themselves constitute the true internal interface. To that end, they are a convention. 9 | Developers should use this API to add functionality to the Dragon Services through new messages. 10 | It is not meant for users. 11 | 12 | 13 | Python Reference 14 | ================ 15 | 16 | .. currentmodule:: dragon.infrastructure 17 | 18 | .. autosummary:: 19 | :toctree: 20 | :recursive: 21 | 22 | channel_desc 23 | facts 24 | group_desc 25 | messages 26 | node_desc 27 | parameters 28 | pool_desc 29 | process_desc 30 | standalone_conn 31 | util 32 | 33 | C Reference 34 | =========== 35 | 36 | .. toctree:: 37 | :maxdepth: 1 38 | 39 | logging 40 | -------------------------------------------------------------------------------- /doc/ref/inf/logging.rst: -------------------------------------------------------------------------------- 1 | .. _DragonInfLogging: 2 | 3 | Logging 4 | +++++++ 5 | 6 | This is Dragons logging interface for the infrastructure API. 7 | 8 | .. contents:: 9 | :depth: 3 10 | :local: 11 | :backlinks: entry 12 | 13 | C Interface 14 | =========== 15 | 16 | .. doxygenfile:: logging.c 17 | 18 | 19 | Cython Interface 20 | ================ 21 | 22 | .. automodule:: dragon.dlogging.logger 23 | :members: DragonLogger, DragonLoggingError -------------------------------------------------------------------------------- /doc/ref/native/C++/index.rst: -------------------------------------------------------------------------------- 1 | .. _DragonNativeC++: 2 | 3 | C++ Language Bindings 4 | +++++++++++++++++++++ 5 | 6 | All C functions are also accessible when using C++. The C++ bindings provide a 7 | higher-level abstraction using class definitions that allow for the same level of 8 | cross-language compatability. 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | 13 | queue.rst -------------------------------------------------------------------------------- /doc/ref/native/C++/queue.rst: -------------------------------------------------------------------------------- 1 | .. _DragonNativeQueueC++: 2 | 3 | Queue in C++ 4 | ++++++++++++ 5 | 6 | C++ Classes and Members 7 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 8 | 9 | .. doxygenclass:: DragonException 10 | :members: 11 | 12 | .. doxygenclass:: DragonManagedQueueBuf 13 | :members: 14 | 15 | .. doxygenclass:: DragonUnmanagedQueueBuf 16 | :members: -------------------------------------------------------------------------------- /doc/ref/native/C/control_structures.rst: -------------------------------------------------------------------------------- 1 | Control Structures in C 2 | +++++++++++++++++++++++ 3 | 4 | 5 | .. 6 | Within the C language, policy decisions will be specified via a `dragonPolicy` structure 7 | as given here. 8 | 9 | 10 | API Specification 11 | ================= 12 | 13 | .. doxygenenum:: dragonAffinity_t 14 | 15 | .. doxygenenum:: dragonWaitMode_t 16 | 17 | .. doxygenstruct:: dragonPolicy_t 18 | :members: 19 | 20 | -------------------------------------------------------------------------------- /doc/ref/native/C/index.rst: -------------------------------------------------------------------------------- 1 | .. _DragonNativeC: 2 | 3 | C Language Bindings 4 | +++++++++++++++++++ 5 | 6 | Following are language bindings for C. All these bindings are cross-language so 7 | data read or written in one language may be read in another language's equivalent 8 | object. 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | 13 | control_structures.rst 14 | queue.rst 15 | 16 | -------------------------------------------------------------------------------- /doc/ref/native/Fortran/index.rst: -------------------------------------------------------------------------------- 1 | .. _DragonNativeFortran: 2 | 3 | .. See https://sphinx-fortran.readthedocs.io/en/latest/ 4 | 5 | Fortran Language Bindings 6 | +++++++++++++++++++++++++ 7 | 8 | Fortran language bindings are available to provide access in Fortran to the same 9 | Dragon communication objects available in C and C++. 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | 14 | queue.rst -------------------------------------------------------------------------------- /doc/ref/native/Fortran/queue.rst: -------------------------------------------------------------------------------- 1 | .. _DragonNativeQueueFortran: 2 | 3 | Queue in Fortran 4 | ++++++++++++++++ 5 | 6 | .. f:autosrcfile:: queue.f90 -------------------------------------------------------------------------------- /doc/ref/policy.rst: -------------------------------------------------------------------------------- 1 | Resource Placement and Affinity 2 | +++++++++++++++++++++++++++++++ 3 | 4 | Python Reference 5 | ---------------- 6 | 7 | .. currentmodule:: dragon.infrastructure.policy 8 | .. autosummary:: 9 | :toctree: 10 | :recursive: 11 | 12 | Policy -------------------------------------------------------------------------------- /doc/ref/telemetry/dragon.telemetry.telemetry.rst: -------------------------------------------------------------------------------- 1 | dragon.telemetry.telemetry 2 | ========================== 3 | 4 | .. automodule:: dragon.telemetry.telemetry 5 | :members: 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | .. rubric:: Classes 18 | 19 | .. autosummary:: 20 | 21 | Telemetry 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /doc/ref/telemetry/index.rst: -------------------------------------------------------------------------------- 1 | .. _TelemetryAPI: 2 | 3 | Telemetry 4 | +++++++++ 5 | 6 | 7 | Python Reference 8 | ---------------- 9 | 10 | .. currentmodule:: dragon.telemetry 11 | 12 | .. autosummary:: 13 | :toctree: 14 | :recursive: 15 | 16 | telemetry 17 | analysis -------------------------------------------------------------------------------- /doc/ref/workflows/index.rst: -------------------------------------------------------------------------------- 1 | .. _WorkflowsAPI: 2 | 3 | Workflows 4 | +++++++++ 5 | 6 | Dragon can integrate into other workflow tools and enhance their scalability and performance. These APIs are 7 | integrations into other tools. These are currently works-in-progress and are useful primarily for experimentation. 8 | 9 | Python Reference 10 | ================ 11 | 12 | .. currentmodule:: dragon.workflows 13 | 14 | .. autosummary:: 15 | :toctree: 16 | :recursive: 17 | 18 | parsl_batch_executor 19 | parsl_executor 20 | parsl_mpi_app 21 | -------------------------------------------------------------------------------- /doc/uses/debugging.rst: -------------------------------------------------------------------------------- 1 | .. _debugging: 2 | 3 | Debugging 4 | +++++++++ 5 | 6 | This should go from simple techniques to common sources of issues to some advanced techniques (perhaps GDB?) 7 | Some obvious things to include 8 | 9 | * -l DEBUG 10 | * default pool size and node memory 11 | * some tricks to isolate problems 12 | * some of the common issues -------------------------------------------------------------------------------- /doc/uses/gpus.rst: -------------------------------------------------------------------------------- 1 | .. _gpu_affinity: 2 | 3 | Controlling GPU Affinity 4 | ++++++++++++++++++++++++ 5 | 6 | This tutorial will draw from the :py:class:`~dragon.infrastructure.policy.Policy` documentation to walk through how 7 | to use :py:class:`~dragon.native.ProcessGroup` to set what GPUs to use for a gien function or process as well 8 | as how to do it with multiprocessing. -------------------------------------------------------------------------------- /doc/uses/grafana.rst: -------------------------------------------------------------------------------- 1 | .. _grafana: 2 | 3 | Telemetry with Grafana 4 | ++++++++++++++++++++++ 5 | 6 | Hit on the themes of the format Telemetry puts out and how Grafana can be used to look at the data. -------------------------------------------------------------------------------- /doc/uses/workflow.rst: -------------------------------------------------------------------------------- 1 | .. _workflows: 2 | 3 | Workflows 4 | +++++++++ 5 | 6 | What should go through here is building up to an example workflow that combines a few elements. Lets say it shows 7 | running an MPI app that feeds into a ddict. From there, data is processed and summarized. -------------------------------------------------------------------------------- /doc_spec/README.md: -------------------------------------------------------------------------------- 1 | Home of Architecture Documentation 2 | =================================== 3 | 4 | This is the enventual home of a completely separate document from our API and 5 | Project documentation. It is separated because it may have similar function 6 | definitions to code that is actually implemented and we can't have duplicate 7 | definitions within the document since Sphinx will complain. 8 | 9 | This documentation provides the specification/architecture requirements that may 10 | not be currently implemented. It may at some point provide overlapping 11 | documentation to our actual implementation/API docs. -------------------------------------------------------------------------------- /doc_spec/images/channel_ChannelDescriptor_attach.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.attach()"; 7 | UP GS "CHANNEL_ATTACH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success: attached"; 10 | GS UP "CHANNEL_IS_ATTACHED"+ 11 | GS UP "c_uid"; 12 | ltext@ GS "fail: no such channel"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc_spec/images/channel_ChannelDescriptor_detach.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.detach()"; 7 | UP GS "CHANNEL_DETACH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success: detached"; 10 | GS UP "CHANNEL_IS_DETACHED"+ 11 | GS UP "c_uid"; 12 | ltext@ GS "fail: no such channel"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc_spec/images/channel_ChannelDescriptor_get_recvh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.get_recvh()"; 7 | UP GS "CHANNEL_GET_RECVH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_RECVH"+ 11 | GS UP "RecvChannel"; 12 | ltext@ GS "fail: recv handle unavailable"; 13 | GS UP "FAIL_CHANNEL_RECVH_UNAVAIL"+ 14 | GS UP "None"; 15 | ltext@ GS "fail: no such channel"; 16 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | GS UP "None"; 18 | -------------------------------------------------------------------------------- /doc_spec/images/channel_ChannelDescriptor_get_sendh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.get_sendh()"; 7 | UP GS "CHANNEL_GET_SENDH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_SENDH"+ 11 | GS UP "SendChannel"; 12 | ltext@ GS "fail: send handle unavailable"; 13 | GS UP "FAIL_CHANNEL_SENDH_UNAVAIL"+ 14 | GS UP "None"; 15 | ltext@ GS "fail: no such channel"; 16 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | GS UP "None"; 18 | -------------------------------------------------------------------------------- /doc_spec/images/channel_ChannelDescriptor_refresh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "ChannelDescriptor.refresh()"; 7 | UP GS "CHANNEL_REFRESH"+ 8 | UP GS "p_uid, c_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_DESCRIPTOR"+ 11 | GS UP "ChannelDescriptor"; 12 | ltext@ GS "fail: no such channel"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc_spec/images/channel_RecvChannel_close.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "RecvChannel.close()"; 7 | UP LS "CHANNEL_CLOSE_RECV"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_RECV_CLOSED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already closed"; 13 | LS UP "FAIL_CHANNEL_CLOSE_RECV_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | -------------------------------------------------------------------------------- /doc_spec/images/channel_RecvChannel_open.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "RecvChannel.open()"; 7 | UP LS "CHANNEL_OPEN_RECV"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_RECV_OPENED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already opened"; 13 | LS UP "FAIL_CHANNEL_OPEN_RECV_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | 19 | -------------------------------------------------------------------------------- /doc_spec/images/channel_SendChannel_close.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "SendChannel.close()"; 7 | UP LS "CHANNEL_CLOSE_SEND"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_SEND_CLOSED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already closed"; 13 | LS UP "FAIL_CHANNEL_CLOSE_SEND_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | -------------------------------------------------------------------------------- /doc_spec/images/channel_SendChannel_open.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | LS "Local Services" 5 | end; 6 | rtext@ UP "SendChannel.open()"; 7 | UP LS "CHANNEL_OPEN_SEND"+ 8 | UP LS "p_uid, c_uid"; 9 | ltext@ LS "success"; 10 | LS UP "CHANNEL_SEND_OPENED"+ 11 | LS UP "None"; 12 | ltext@ LS "fail: already opened"; 13 | LS UP "FAIL_CHANNEL_OPEN_SEND_ALREADY"+ 14 | LS UP "None"; 15 | ltext@ LS "fail: no such channel"; 16 | LS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | LS UP "None"; 18 | 19 | -------------------------------------------------------------------------------- /doc_spec/images/channel_create.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "Channel()"; 7 | UP GS "CHANNEL_CREATE"+ 8 | UP GS "p_uid, channel props, [user_name]"; 9 | ltext@ GS "success, channel created"; 10 | ltext@ GS "new channel n_cid and names assigned"; 11 | GS UP "CHANNEL_DESCRIPTOR"+ 12 | GS UP "ChannelDescriptor"; 13 | ltext@ GS "fail: channel couldn't be made"; 14 | GS UP "FAIL_CHANNEL_CREATE"+ 15 | GS UP "error info"; 16 | ltext@ GS "fail: user name in use"; 17 | GS UP "FAIL_CHANNEL_ALREADY"+ 18 | GS UP "ChannelDescriptor"; 19 | -------------------------------------------------------------------------------- /doc_spec/images/channel_destroy.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "channel.destroy()"; 7 | UP GS "CHANNEL_DESTROY"+ 8 | UP GS "p_uid, target(c_uid| user_name)"; 9 | ltext@ GS "success: channel removed"; 10 | GS UP "CHANNEL_IS_DESTROYED"+ 11 | GS UP "None"; 12 | ltext@ GS "fail: channel is busy"; 13 | GS UP "FAIL_CHANNEL_BUSY"+ 14 | GS UP "error info"; 15 | ltext@ GS "fail: channel unknown"; 16 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 17 | GS UP "None"; 18 | -------------------------------------------------------------------------------- /doc_spec/images/channel_list.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "channel.list()"; 7 | UP GS "CHANNEL_LIST"+ 8 | UP GS "p_uid"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_LIST_RESP"+ 11 | GS UP "[(c_uid, name)]"; 12 | 13 | -------------------------------------------------------------------------------- /doc_spec/images/channel_query.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "channel.query()"; 7 | UP GS "CHANNEL_QUERY"+ 8 | UP GS "p_uid, target(c_uid | user_name)"; 9 | ltext@ GS "success"; 10 | GS UP "CHANNEL_DESCRIPTOR"+ 11 | GS UP "ChannelDescriptor"; 12 | ltext@ GS "fail: names not found"; 13 | GS UP "FAIL_CHANNEL_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc_spec/images/process_ProcessInfo_refresh.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.ProcessInfo.refresh()"; 7 | UP GS "PROCESS_QUERY"+ 8 | UP GS "n_uid, target(n_uid | user_name)"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO"+ 11 | GS UP "ProcessInfo"; 12 | ltext@ GS "fail: names not found"; 13 | ltext@ GS "shouldn't ever fail - process records persist"; 14 | GS UP "FAIL_PROCESS_UNKNOWN"+ 15 | GS UP "None"; 16 | -------------------------------------------------------------------------------- /doc_spec/images/process_create.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.create()"; 7 | UP GS "PROCESS_CREATE"+ 8 | UP GS "p_uid, [user_name] exe, args, env"; 9 | ltext@ GS "success, process created"; 10 | ltext@ GS "new process p_uid and names assigned"; 11 | GS UP "PROCESS_INFO"+ 12 | GS UP "ProcessInfo"; 13 | ltext@ GS "fail: process couldn't start"; 14 | GS UP "FAIL_PROCESS_CREATE"+ 15 | GS UP "error info"; 16 | ltext@ GS "fail: user name in use"; 17 | GS UP "FAIL_PROCESS_ALREADY"+ 18 | GS UP "ProcessInfo"; 19 | -------------------------------------------------------------------------------- /doc_spec/images/process_join.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.join()"; 7 | rtext@ UP "block: server does timeout"; 8 | UP GS "PROCESS_JOIN"+ 9 | UP GS "p_uid, target(p_uid | user_name)"; 10 | ltext@ GS "success, process ended"; 11 | GS UP "PROCESS_IS_JOINED"+ 12 | GS UP "return code"; 13 | ltext@ GS "timer expired"; 14 | GS UP "FAIL_PROCESS_JOIN_TIMEOUT"+ 15 | GS UP "None"; 16 | ltext@ GS "fail: names not found"; 17 | GS UP "FAIL_PROCESS_UNKNOWN"+ 18 | GS UP "None"; 19 | -------------------------------------------------------------------------------- /doc_spec/images/process_kill.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.kill()"; 7 | UP GS "PROCESS_KILL"+ 8 | UP GS "p_uid, target(p_uid | user_name), sig"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO"+ 11 | GS UP "ProcessInfo"; 12 | ltext@ GS "fail: names not found"; 13 | GS UP "FAIL_PROCESS_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc_spec/images/process_list.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.list()"; 7 | UP GS "PROCESS_LIST"+ 8 | UP GS "p_uid"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO_LIST"+ 11 | GS UP "[(p_uid, name)]"; 12 | -------------------------------------------------------------------------------- /doc_spec/images/process_query.srms: -------------------------------------------------------------------------------- 1 | 6 4 2 | begin components 3 | UP "User Process" 4 | GS "Global Services" 5 | end; 6 | rtext@ UP "process.query()"; 7 | UP GS "PROCESS_QUERY"+ 8 | UP GS "p_uid, target(p_uid | user_name)"; 9 | ltext@ GS "success"; 10 | GS UP "PROCESS_INFO"+ 11 | GS UP "ProcessInfo"; 12 | ltext@ GS "fail: names not found"; 13 | GS UP "FAIL_PROCESS_UNKNOWN"+ 14 | GS UP "None"; 15 | -------------------------------------------------------------------------------- /doc_spec/index.rst: -------------------------------------------------------------------------------- 1 | .. Dragon Architecture documentation master file, created by 2 | sphinx-quickstart on Mon May 22 22:06:10 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Dragon Architecture's documentation! 7 | =============================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | client.rst 14 | 15 | 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /dst/manylinux2014_py310.yaml: -------------------------------------------------------------------------------- 1 | - name: manylinux2014-build 2 | os: centos7 3 | arch: x86_64 4 | image: arti.hpc.amslabs.hpecorp.net/quay-remote/pypa/manylinux2014_x86_64 5 | spec_file: fake.spec 6 | pre_build: dst/runBuildPrep.py310.sh 7 | build: dst/runBuild.sh 8 | post_build: dst/runUnitTest.sh 9 | container_flags: ["--shm-size=4gb", "--network=host", "--privileged", "--rm"] 10 | -------------------------------------------------------------------------------- /dst/manylinux2014_py311.yaml: -------------------------------------------------------------------------------- 1 | - name: manylinux2014-build 2 | os: centos7 3 | arch: x86_64 4 | image: arti.hpc.amslabs.hpecorp.net/quay-remote/pypa/manylinux2014_x86_64 5 | spec_file: fake.spec 6 | pre_build: dst/runBuildPrep.py311.sh 7 | build: dst/runBuild.sh 8 | post_build: dst/runUnitTest.sh 9 | container_flags: ["--shm-size=4gb", "--network=host", "--privileged", "--rm"] 10 | -------------------------------------------------------------------------------- /dst/manylinux2014_py312.yaml: -------------------------------------------------------------------------------- 1 | - name: manylinux2014-build 2 | os: centos7 3 | arch: x86_64 4 | image: arti.hpc.amslabs.hpecorp.net/quay-remote/pypa/manylinux2014_x86_64 5 | spec_file: fake.spec 6 | pre_build: dst/runBuildPrep.py312.sh 7 | build: dst/runBuild.sh 8 | post_build: dst/runUnitTest.sh 9 | container_flags: ["--shm-size=4gb", "--network=host", "--privileged", "--rm"] 10 | -------------------------------------------------------------------------------- /dst/runBuildPrep.py310.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # install barebones PE PrgEnv-gnu in a DST container for Dragon bld 5 | # 6 | 7 | set -x 8 | 9 | pwd 10 | ls -alt 11 | lscpu 12 | unset TARGET_ARCH 13 | 14 | cat pipeline_env_vars.txt 15 | 16 | ./dst/runBuildPrep.general.sh 17 | 18 | source ~/.bashrc 19 | conda create -y -n _dev python=3.10 # For the building of dragon 20 | conda create -y -n _env python=3.10 # For testing of release package 21 | -------------------------------------------------------------------------------- /dst/runBuildPrep.py311.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # install barebones PE PrgEnv-gnu in a DST container for Dragon bld 5 | # 6 | 7 | set -x 8 | 9 | pwd 10 | ls -alt 11 | lscpu 12 | unset TARGET_ARCH 13 | 14 | cat pipeline_env_vars.txt 15 | 16 | ./dst/runBuildPrep.general.sh 17 | 18 | source ~/.bashrc 19 | conda create -y -n _dev python=3.11 # For the building of dragon 20 | conda create -y -n _env python=3.11 # For testing of release package 21 | -------------------------------------------------------------------------------- /dst/runBuildPrep.py312.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # install barebones PE PrgEnv-gnu in a DST container for Dragon bld 5 | # 6 | 7 | set -x 8 | 9 | pwd 10 | ls -alt 11 | lscpu 12 | unset TARGET_ARCH 13 | 14 | cat pipeline_env_vars.txt 15 | 16 | ./dst/runBuildPrep.general.sh 17 | 18 | source ~/.bashrc 19 | conda create -y -n _dev python=3.12 # For the building of dragon 20 | conda create -y -n _env python=3.12 # For testing of release package 21 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Dragon Examples 2 | 3 | This directory contains example code using the Dragon APIs. The purpose of examples here is to show how the 4 | Dragon APIs can be used directly to accomplish a variety of tasks including shared memory access, process 5 | control, and communication. These examples are in some cases motivated by multiprocessing and give an idea of 6 | how its interfaces are implemented over Dragon. 7 | 8 | There are READMEs in all directories with more details on the available examples. 9 | 10 | * `dragon_gs_client`: examples using the Dragon GS Client API directly 11 | * `dragon_native`: examples using Dragons native objects 12 | * `multiprocessing`: examples using Python Multiprocessing with the Dragon runtime. 13 | * `dragon_data`: examples using Dragon Dictionary object 14 | 15 | Refer to the API reference in the documentation for further information on the Dragon API stack. -------------------------------------------------------------------------------- /examples/dragon_core/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | INCLUDE = -I $(DRAGON_INCLUDE_DIR) 3 | LIBS = -L $(DRAGON_BASE_DIR)/lib 4 | CFLAGS = -O3 5 | INSTALL = install -D 6 | 7 | INSTALL_LIB = lib 8 | INSTALL_INCLUDE = include 9 | INSTALL_PYLIB = pylib 10 | 11 | BIN_FILES = logging ringproc 12 | 13 | BASE_DIR = $(PWD) 14 | 15 | COMPONENT_DIRS = performance 16 | 17 | %.c.o: %.c 18 | $(CC) $(INCLUDE) $(CFLAGS) -c $< -o $@ 19 | 20 | default: ringproc logging 21 | $(foreach p,$(COMPONENT_DIRS),cd $(p) && $(MAKE) dist && cd $(BASE_DIR);) 22 | 23 | ringproc: ringproc.c.o 24 | $(CC) $(INCLUDE) $(CFLAGS) -o ringproc $< $(LIBS) -ldragon 25 | 26 | logging: logging.c.o 27 | $(CC) $(INCLUDE) $(CFLAGS) -o logging $< $(LIBS) -ldragon -lrt 28 | 29 | clean: 30 | $(foreach p,$(COMPONENT_DIRS),cd $(p) && $(MAKE) clean && cd $(BASE_DIR);) 31 | rm -rf *.o $(BIN_FILES) core *.bin 32 | rm -rf /dev/shm/* 33 | -------------------------------------------------------------------------------- /examples/dragon_core/performance/.gitignore: -------------------------------------------------------------------------------- 1 | ch_p2p_bandwidth 2 | ch_p2p_latency 3 | ch_p2p_msg_rate 4 | -------------------------------------------------------------------------------- /examples/dragon_core/performance/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | AR = ar 3 | INCLUDE = -I $(DRAGON_INCLUDE_DIR) 4 | LIBS = -L $(DRAGON_BASE_DIR)/lib -L. 5 | CFLAGS = -O3 6 | CREQFLAGS = -fPIC 7 | INSTALL = install -D 8 | 9 | INSTALL_LIB = lib 10 | INSTALL_INCLUDE = include 11 | INSTALL_PYLIB = pylib 12 | 13 | BIN_FILES = ch_p2p_latency ch_p2p_bandwidth ch_p2p_msg_rate 14 | 15 | %.c.o: %.c 16 | $(CC) $(CFLAGS) $(CREQFLAGS) $(INCLUDE) -c $< -o $@ 17 | 18 | %.a: %.c.o 19 | $(AR) cr lib$@ $< 20 | 21 | dist: default 22 | 23 | default: $(BIN_FILES) 24 | 25 | debug: CFLAGS += -DDEBUG -g 26 | debug: default 27 | 28 | ch_p2p_latency: ch_p2p_latency.c.o ch_p2p_common.a 29 | $(CC) $(INCLUDE) $(CFLAGS) -o ch_p2p_latency $< $(LIBS) -ldragon -lrt -lch_p2p_common 30 | 31 | ch_p2p_bandwidth: ch_p2p_bandwidth.c.o ch_p2p_common.a 32 | $(CC) $(INCLUDE) $(CFLAGS) -o ch_p2p_bandwidth $< $(LIBS) -ldragon -lrt -lch_p2p_common 33 | 34 | ch_p2p_msg_rate: ch_p2p_msg_rate.c.o ch_p2p_common.a 35 | $(CC) $(INCLUDE) $(CFLAGS) -o ch_p2p_msg_rate $< $(LIBS) -ldragon -lrt -lch_p2p_common 36 | 37 | clean: 38 | rm -rf *.o *.a $(BIN_FILES) 39 | -------------------------------------------------------------------------------- /examples/dragon_core/performance/run_ch_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export DRAGON_TRANSPORT_TEST=1 4 | export DRAGON_TRANSPORT_AGENT=tcp 5 | 6 | cd ../../../ && . hack/multinode_config 7 | cd - 8 | 9 | # Remove any cores lying around from the CTI destructor that does 10 | # a double free on an exit from main 11 | rm core 12 | 13 | # Can set up to 3 for increasing verbosity 14 | export MRNET_DEBUG_LEVEL=0 15 | export DRAGON_DEBUG=0 16 | # Setting CTI_DEBUG will turn on logging 17 | export CTI_DEBUG=0 18 | 19 | # Tell CTI where to log to 20 | mkdir -p log 21 | export CTI_LOG_DIR=$PWD/log 22 | 23 | rm -f log/* 24 | rm -f *.log 25 | 26 | # dragon_multi is a sym link to src/dragon/dragon_multi_fe.py 27 | dragon_multi ./ch_p2p_benchmark.py 28 | 29 | -------------------------------------------------------------------------------- /examples/dragon_data/ddict/.gitignore: -------------------------------------------------------------------------------- 1 | ddict_pi_sim_train 2 | ddict_pi_sim_aggregate -------------------------------------------------------------------------------- /examples/dragon_data/requirements.txt: -------------------------------------------------------------------------------- 1 | zarr==2.18.4 2 | -------------------------------------------------------------------------------- /examples/dragon_gs_client/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | 3 | CFLAGS = -g -pedantic -Wall -I ${CRAY_MPICH_DIR}/include -L ${CRAY_MPICH_DIR}/lib 4 | LD_FLAGS = -lm -L ${CRAY_MPICH_DIR}/lib -lmpich 5 | H_SOURCES = 6 | 7 | MPI_EXE=mpi_hello 8 | MPI_SRC=mpi_hello.c 9 | MPI_OBJECT = $(MPI_SRC:.c=.c.o) 10 | 11 | default: $(MPI_EXE) 12 | 13 | %.c.o: %.c $(H_SOURCES) 14 | $(CC) $(CFLAGS) -c $< -o $@ 15 | 16 | $(MPI_EXE): $(MPI_OBJECT) 17 | $(CC) $(LD_FLAGS) $^ -o $@ 18 | 19 | clean: 20 | $(RM) $(MPI_EXE) $(MPI_OBJECT) 21 | -------------------------------------------------------------------------------- /examples/dragon_gs_client/mpi_hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | int main(int argc, char** argv) { 8 | 9 | // Initialize the MPI environment 10 | MPI_Init(NULL, NULL); 11 | 12 | // Get the number of processes 13 | int world_size; 14 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); 15 | 16 | // Get the rank of the process 17 | int world_rank; 18 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); 19 | 20 | // Get the name of the processor 21 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 22 | int name_len; 23 | MPI_Get_processor_name(processor_name, &name_len); 24 | 25 | // Print off a hello world message 26 | printf("Hello world from pid %d, processor %s, rank %d out of %d processors\n", 27 | getpid(), processor_name, world_rank, world_size); 28 | 29 | // Finalize the MPI environment. 30 | MPI_Finalize(); 31 | 32 | } 33 | -------------------------------------------------------------------------------- /examples/dragon_gs_client/requests.txt: -------------------------------------------------------------------------------- 1 | apples 2 | bananas 3 | rodents 4 | stop -------------------------------------------------------------------------------- /examples/dragon_native/mpi/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | 3 | CFLAGS = -g -pedantic -Wall -I ${CRAY_MPICH_DIR}/include -L ${CRAY_MPICH_DIR}/lib 4 | LD_FLAGS = -lm -L ${CRAY_MPICH_DIR}/lib -lmpich 5 | H_SOURCES = 6 | 7 | MPI_EXE=mpi_hello 8 | MPI_SRC=mpi_hello.c 9 | MPI_OBJECT = $(MPI_SRC:.c=.c.o) 10 | 11 | OSU_ALLTOALL_EXE=osu_alltoall 12 | 13 | .PHONY: all 14 | 15 | all: $(MPI_EXE) $(OSU_ALLTOALL_EXE) 16 | default: all 17 | 18 | $(OSU_ALLTOALL_EXE): 19 | $(MAKE) -C alltoall 20 | 21 | %.c.o: %.c $(H_SOURCES) 22 | $(CC) $(CFLAGS) -c $< -o $@ 23 | 24 | $(MPI_EXE): $(MPI_OBJECT) 25 | $(CC) $(LD_FLAGS) $^ -o $@ 26 | 27 | clean: 28 | $(RM) $(MPI_EXE) $(MPI_OBJECT) 29 | $(MAKE) -C alltoall clean 30 | -------------------------------------------------------------------------------- /examples/dragon_native/mpi/alltoall/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | 3 | CFLAGS = -g -I. -I ${CRAY_MPICH_DIR}/include 4 | LD_FLAGS = -lm -L ${CRAY_MPICH_DIR}/lib -lmpich 5 | 6 | OSU_SRC = osu_alltoall.c \ 7 | osu_util.c \ 8 | osu_util_graph.c \ 9 | osu_util_mpi.c \ 10 | osu_util_papi.c \ 11 | osu_util_validation.c 12 | 13 | OSU_OBJ = $(OSU_SRC:.c=.c.o) 14 | OSU_EXE = ../osu_alltoall 15 | 16 | 17 | %.c.o: %.c 18 | $(CC) $(CFLAGS) -c $< -o $@ 19 | 20 | $(OSU_EXE): $(OSU_OBJ) 21 | $(CC) $(LD_FLAGS) $^ -o $@ 22 | 23 | clean: 24 | rm *.o $(OSU_EXE) -------------------------------------------------------------------------------- /examples/dragon_native/mpi/alltoall/osu_util_papi.h: -------------------------------------------------------------------------------- 1 | /* 2 | *Copyright (c) 2002-2024 the Network-Based Computing Laboratory 3 | *(NBCL), The Ohio State University. 4 | * 5 | *Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | *For detailed copyright and licensing information, please refer to the 8 | *copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | 11 | /* 12 | * PAPI uses -1 as a nonexistent hardware event placeholder 13 | * https://icl.utk.edu/papi/docs/df/d34/group__consts.html 14 | */ 15 | #ifdef _ENABLE_PAPI_ 16 | #define OMB_PAPI_NULL PAPI_NULL 17 | #else 18 | #define OMB_PAPI_NULL -1 19 | #endif 20 | 21 | #define OMB_PAPI_FILE_PATH_MAX_LENGTH OMB_FILE_PATH_MAX_LENGTH 22 | #define OMB_PAPI_NUMBER_OF_EVENTS 100 23 | 24 | void omb_papi_init(int *papi_eventset); 25 | void omb_papi_start(int *papi_eventset); 26 | void omb_papi_stop_and_print(int *papi_eventset, int size); 27 | void omb_papi_free(int *papi_eventset); 28 | void omb_papi_parse_event_options(char *opt_arr); 29 | -------------------------------------------------------------------------------- /examples/dragon_native/mpi/mpi_hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | int main(int argc, char** argv) { 8 | 9 | // Initialize the MPI environment 10 | MPI_Init(NULL, NULL); 11 | 12 | // Get the number of processes 13 | int world_size; 14 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); 15 | 16 | // Get the rank of the process 17 | int world_rank; 18 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); 19 | 20 | // Get the name of the processor 21 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 22 | int name_len; 23 | MPI_Get_processor_name(processor_name, &name_len); 24 | 25 | // Print off a hello world message 26 | printf("Hello world from pid %d, processor %s, rank %d out of %d processors\n", 27 | getpid(), processor_name, world_rank, world_size); 28 | 29 | // Finalize the MPI environment. 30 | MPI_Finalize(); 31 | 32 | } 33 | -------------------------------------------------------------------------------- /examples/dragon_native/mpi/mpi_process_group_demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run a sample MPI Hello World application with 1 rank per allocated node. 3 | """ 4 | 5 | import os 6 | 7 | from dragon.native.process import ProcessTemplate 8 | from dragon.native.process_group import ProcessGroup 9 | from dragon.globalservices.node import get_list 10 | 11 | 12 | def main(): 13 | nnodes = len(get_list()) 14 | mpi_hello_cmd = os.path.join(os.getcwd(), "mpi_hello") 15 | args = [] 16 | cwd = os.getcwd() 17 | 18 | if not nnodes: 19 | print("No slurm allocation detected.") 20 | os.exit(-1) 21 | 22 | pool = ProcessGroup(restart=False, pmi_enabled=True) 23 | pool.add_process( 24 | nproc=nnodes, 25 | template=ProcessTemplate( 26 | target=mpi_hello_cmd, args=args, cwd=cwd, env=None 27 | ), 28 | ) 29 | pool.init() 30 | pool.start() 31 | pool.join() 32 | pool.close() 33 | 34 | return 0 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /examples/dragon_telemetry/telemetry.yaml: -------------------------------------------------------------------------------- 1 | # Configure Telemetry env variables here 2 | # Aggregator 3 | aggregator_port: 4242 4 | # TSDB Server - must be different from aggregator_port 5 | tsdb_server_port: 4243 6 | # Collector 7 | collector_rate: 0.5 8 | default_tmdb_dir: "/tmp" 9 | delete_tmdb: 1 10 | default_tmdb_window: 300 11 | # SSH Tunnel 12 | # remote_tunnel_node: 13 | # remote_tunnel_port: 14 | -------------------------------------------------------------------------------- /examples/dragon_workflows/run_client.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dragon -s lazy_attach.py ${1} 4 | -------------------------------------------------------------------------------- /examples/dragon_workflows/run_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #dragon-cleanup > /dev/null 2>&1 4 | ../../src/bin/dragon-cleanup > /dev/null 2>&1 5 | rm ./*.log core > /dev/null 2>&1 6 | rm ~/.dragon/my-runtime > /dev/null 2>&1 7 | cc -o mpi_hello mpi_hello.c 8 | dragon server.py 9 | rm mpi_hello 10 | -------------------------------------------------------------------------------- /examples/dragon_workflows/server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import dragon.infrastructure.parameters as dparm 4 | import dragon.workflows.runtime as runtime 5 | 6 | 7 | def wait_for_exit(): 8 | path = '/home/users/nradclif/hpc-pe-dragon-dragon/examples/dragon_workflows/client_exit' 9 | while not os.path.exists(path): 10 | time.sleep(1) 11 | time.sleep(1) 12 | if dparm.this_process.index == 0: 13 | os.remove(path) 14 | 15 | 16 | sdesc = runtime.publish('my-runtime') 17 | print(f'Runtime serialized descriptor: {sdesc}', flush=True) 18 | wait_for_exit() 19 | -------------------------------------------------------------------------------- /examples/jupyter/example.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | def hello(arg=None): 4 | print(f'Hello DRAGON from {socket.gethostname()}!!', flush=True) 5 | 6 | 7 | if __name__ == "__main__": 8 | hello() 9 | -------------------------------------------------------------------------------- /examples/jupyter/getout.py: -------------------------------------------------------------------------------- 1 | import dragon 2 | import multiprocessing as mp 3 | import dragon.globalservices.process as gproc 4 | import dragon.infrastructure.util as dutil 5 | import example as ex 6 | 7 | def main(): 8 | mp.set_start_method('dragon') 9 | dutil.enable_logging() 10 | gproc.start_capturing_child_mp_output() 11 | 12 | proc = mp.Process(target=ex.hello, args=()) 13 | proc.start() 14 | proc.join() 15 | gproc.stop_capturing_child_mp_output() 16 | 17 | if __name__ == "__main__": 18 | main() 19 | 20 | -------------------------------------------------------------------------------- /examples/jupyter/imageproc.py: -------------------------------------------------------------------------------- 1 | import scipy.signal 2 | import dragon 3 | import os 4 | import socket 5 | import multiprocessing as mp 6 | 7 | def hello(arg=None): 8 | print(f'Hello DRAGON from {socket.gethostname()} with {mp.cpu_count()} cores!!', flush=True) 9 | 10 | def f(args): 11 | image, random_filter = args 12 | # Do some image processing. 13 | return scipy.signal.convolve2d(image, random_filter)[::5, ::5] 14 | 15 | if __name__ == "__main__": 16 | hello() -------------------------------------------------------------------------------- /examples/multiprocessing/perf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import dragon 4 | import multiprocessing as mp 5 | import dragon.perf as dperf 6 | 7 | if __name__ == "__main__": 8 | num_procs = 8 9 | with dperf.Session(num_procs) as session: 10 | kernel = session.new_kernel('small msg all-to-all') 11 | 12 | small_msg_size = 64 13 | large_msg_size = 1024 * 1024 14 | timeout_in_sec = 999 15 | 16 | # create an all-to-all kernel 17 | 18 | for src_ch_idx in range(num_procs): 19 | for dst_ch_idx in range(num_procs): 20 | kernel.append(dperf.Opcode.SEND_MSG, src_ch_idx, dst_ch_idx, small_msg_size, timeout_in_sec) 21 | 22 | for src_ch_idx in range(num_procs): 23 | for _ in range(num_procs): 24 | kernel.append(dperf.Opcode.GET_MSG, src_ch_idx, src_ch_idx, timeout_in_sec) 25 | 26 | # run the kernel 27 | 28 | kernel.run() 29 | 30 | -------------------------------------------------------------------------------- /examples/multiprocessing/unittests/.gitignore: -------------------------------------------------------------------------------- 1 | # Github boilerplate .gitignore 2 | .DS_Store 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | bin/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # Installer logs 27 | pip-log.txt 28 | pip-delete-this-directory.txt 29 | 30 | # Unit test / coverage reports 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | -------------------------------------------------------------------------------- /examples/multiprocessing/unittests/mp_fork_bomb.py: -------------------------------------------------------------------------------- 1 | import dragon 2 | import multiprocessing, sys 3 | 4 | 5 | def foo(): 6 | print("123") 7 | 8 | 9 | # Because "if __name__ == '__main__'" is missing this will not work 10 | # correctly on Windows. However, we should get a RuntimeError rather 11 | # than the Windows equivalent of a fork bomb. 12 | 13 | if len(sys.argv) > 1: 14 | multiprocessing.set_start_method(sys.argv[1]) 15 | else: 16 | multiprocessing.set_start_method("spawn") 17 | 18 | p = multiprocessing.Process(target=foo) 19 | p.start() 20 | p.join() 21 | sys.exit(p.exitcode) 22 | -------------------------------------------------------------------------------- /examples/multiprocessing/unittests/orig/_test_multiprocessing_spawn.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import test._test_multiprocessing 3 | 4 | from test import support 5 | 6 | if support.PGO: 7 | raise unittest.SkipTest("test is not helpful for PGO") 8 | 9 | test._test_multiprocessing.install_tests_in_module_dict(globals(), 'spawn') 10 | 11 | if __name__ == '__main__': 12 | unittest.main() 13 | -------------------------------------------------------------------------------- /examples/smartsim/client_logging/Makefile: -------------------------------------------------------------------------------- 1 | rand: 2 | g++ rand.cpp -o rand.exe 3 | 4 | error_rand: 5 | g++ rand_failure.cpp -o rand_failure.exe 6 | 7 | all: rand error_rand 8 | 9 | clean: 10 | rm *.exe 11 | -------------------------------------------------------------------------------- /examples/smartsim/client_logging/rand.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char* argv[]) { 7 | std::srand(std::time(nullptr)); 8 | double random_num = std::rand() / static_cast(RAND_MAX) * 10.0; 9 | std::cout< 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char* argv[]) { 8 | if (argc != 2) { 9 | return 1; 10 | } 11 | 12 | int sleep_time = std::atoi(argv[1]); 13 | 14 | std::srand(std::time(nullptr)); // seed the random number generator 15 | 16 | std::cout << "Sleeping for " << sleep_time << " seconds..." << std::endl; 17 | sleep(sleep_time); 18 | 19 | double random_num = std::rand() / static_cast(RAND_MAX) * 10.0; 20 | std::cout< 5 | #include 6 | 7 | #define DRAGON_QUEUE_UMAP_SEED 12 8 | 9 | typedef struct dragonQueue_st { 10 | dragonMemoryPoolDescr_t pool; 11 | dragonChannelDescr_t ch; 12 | dragonChannelSendh_t csend; 13 | dragonChannelRecvh_t crecv; 14 | dragonQ_UID_t q_uid; 15 | } dragonQueue_t; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/lib/_shared_lock.h: -------------------------------------------------------------------------------- 1 | #ifndef HAVE_DRAGON_LOCK_INTERNAL_H 2 | #define HAVE_DRAGON_LOCK_INTERNAL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "shared_lock.h" 9 | 10 | #ifdef __cplusplus 11 | #include 12 | using namespace std; 13 | #else 14 | #include 15 | #endif 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #define DRAGON_LOCK_POLL_PRE_SLEEP_ITERS 10000000UL 22 | #define DRAGON_LOCK_POLL_SLEEP_USEC 2 23 | #define DRAGON_LOCK_MEM_ORDER memory_order_acq_rel 24 | #define DRAGON_LOCK_MEM_ORDER_FAIL memory_order_relaxed 25 | #define DRAGON_LOCK_MEM_ORDER_READ memory_order_acquire 26 | #define DRAGON_LOCK_CL_PADDING 64 27 | #define DRAGON_LOCK_NODE_FANOUT 16 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif -------------------------------------------------------------------------------- /src/lib/hostid.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | dragonULInt 9 | dragon_host_id(); 10 | 11 | dragonError_t 12 | dragon_set_host_id(dragonULInt id); 13 | 14 | dragonULInt 15 | dragon_host_id_from_k8s_uuid(char *pod_uid); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif -------------------------------------------------------------------------------- /src/lib/ulist.h: -------------------------------------------------------------------------------- 1 | #ifndef HAVE_DRAGON_LIST_H 2 | #define HAVE_DRAGON_LIST_H 3 | 4 | #include 5 | #include 6 | #include "shared_lock.h" 7 | #include 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | typedef struct dragonList_st { 14 | dragonLock_t _dlock; 15 | void * _lmem; 16 | void * _list; 17 | } dragonList_t; 18 | 19 | dragonError_t 20 | dragon_ulist_create(dragonList_t **dlist_in); 21 | 22 | dragonError_t 23 | dragon_ulist_destroy(dragonList_t **dlist_in); 24 | 25 | dragonError_t 26 | dragon_ulist_additem(dragonList_t **dlist_in, const void *item); 27 | 28 | dragonError_t 29 | dragon_ulist_delitem(dragonList_t **dlist_in, const void *item); 30 | 31 | dragonError_t 32 | dragon_ulist_get_current_advance(dragonList_t **dlist_in, void **item); 33 | 34 | dragonError_t 35 | dragon_ulist_get_by_idx(dragonList_t **dlist_in, int idx, void **item); 36 | 37 | bool 38 | dragon_ulist_contains(dragonList_t **dlist_in, const void *item); 39 | 40 | size_t 41 | dragon_ulist_get_size(dragonList_t **dlist_in); 42 | 43 | #ifdef __cplusplus 44 | } 45 | #endif 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /src/pkg/LICENSE.md: -------------------------------------------------------------------------------- 1 | Dragon License: 2 | --------------- 3 | 4 | Copyright 2023-2025 Hewlett Packard Enterprise Development LP 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | -------------------------------------------------------------------------------- /src/pkg/Makefile: -------------------------------------------------------------------------------- 1 | INSTALL ?= install 2 | INSTALL_DIR ?= ../dist/ 3 | INSTALL_MODE ?= 755 4 | 5 | PRODUCTS = INSTALL.md \ 6 | LICENSE.md \ 7 | README.md \ 8 | RELEASE_NOTES.md \ 9 | CHANGELOG.md 10 | 11 | 12 | build: 13 | @echo "Nothing to do for now" 14 | 15 | dist: 16 | $(foreach p,$(PRODUCTS),$(INSTALL) -m $(INSTALL_MODE) -D $(p) $(INSTALL_DIR)/$(p);) 17 | 18 | clean: 19 | @echo "Nothing to do for now" 20 | -------------------------------------------------------------------------------- /src/pyproject.toml: -------------------------------------------------------------------------------- 1 | ../pyproject.toml -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- 1 | ../.devcontainer/requirements.txt -------------------------------------------------------------------------------- /src/requirements_examples.txt: -------------------------------------------------------------------------------- 1 | jupyter>=1.1.1 2 | scipy>=1.13.1 3 | six>=1.16.0 4 | vacuum>=0.3.1 5 | zarr<3.0.0 -------------------------------------------------------------------------------- /src/tools/dragon-flame-graph: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | save_cwd=$(pwd) 4 | fg_dir=${1} 5 | 6 | cp perf.data ${fg_dir} 7 | cd ${fg_dir} 8 | 9 | perf script | ./stackcollapse-perf.pl > out.perf-folded 10 | ./flamegraph.pl out.perf-folded > ${save_cwd}/perf.svg 11 | 12 | rm perf.data 13 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.log 3 | *.o 4 | *.dat 5 | dump_file 6 | native/c_ddict 7 | native/cpp_ddict 8 | multi-node/zarr_data 9 | -------------------------------------------------------------------------------- /test/MANIFEST: -------------------------------------------------------------------------------- 1 | 2 | 3 | setup.sh 4 | (placeholder) shell script to source to set up paths to run tests. 5 | 6 | ./support/ 7 | Some support utilities for testing 8 | 9 | ./mp_bench/ 10 | Initial approach at benchmarking standard multiprocessing 11 | 12 | ./globalservices/ 13 | Tests for global services 14 | 15 | ./shepherd/ 16 | Tests for shepherd -------------------------------------------------------------------------------- /test/broadcast/.gitignore: -------------------------------------------------------------------------------- 1 | test_bcast 2 | perf_bcast 3 | -------------------------------------------------------------------------------- /test/broadcast/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS ?= -fPIC -Wall -Ofast -fomit-frame-pointer 3 | #CFLAGS ?= -g -fPIC -Wall -O0 4 | 5 | ifeq ($(DRAGON_INCLUDE_DIR),) 6 | DRAGON_INCLUDE = $(shell dragon-config -o) 7 | else 8 | DRAGON_INCLUDE = -I $(DRAGON_INCLUDE_DIR) 9 | endif 10 | 11 | ifeq ($(DRAGON_LIB_DIR),) 12 | DRAGON_LINK = $(shell dragon-config -l) 13 | else 14 | DRAGON_LINK = -L $(shell echo ${DRAGON_LIB_DIR}) -ldragon 15 | endif 16 | 17 | INCLUDE = $(DRAGON_INCLUDE) 18 | LIBS = $(DRAGON_LINK) 19 | 20 | BIN_FILES = test_bcast pPrf_bcast 21 | 22 | %.c.o: %.c 23 | $(CC) $(INCLUDE) $(CFLAGS) -c $< -o $@ 24 | 25 | default: build 26 | 27 | build: test_bcast perf_bcast 28 | 29 | test_bcast: test_bcast.c.o 30 | $(CC) $(INCLUDE) $(CFLAGS) -o test_bcast $< $(LIBS) -lrt -pthread -ldl 31 | 32 | perf_bcast: perf_bcast.c.o 33 | $(CC) $(INCLUDE) $(CFLAGS) -o perf_bcast $< $(LIBS) -lrt -pthread -ldl 34 | 35 | clean: 36 | rm -rf *.o $(BIN_FILES) 37 | 38 | -------------------------------------------------------------------------------- /test/ccfutures/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/test/ccfutures/test/__init__.py -------------------------------------------------------------------------------- /test/ccfutures/test/support/logging_helper.py: -------------------------------------------------------------------------------- 1 | import logging.handlers 2 | 3 | 4 | class TestHandler(logging.handlers.BufferingHandler): 5 | def __init__(self, matcher): 6 | # BufferingHandler takes a "capacity" argument 7 | # so as to know when to flush. As we're overriding 8 | # shouldFlush anyway, we can set a capacity of zero. 9 | # You can call flush() manually to clear out the 10 | # buffer. 11 | logging.handlers.BufferingHandler.__init__(self, 0) 12 | self.matcher = matcher 13 | 14 | def shouldFlush(self): 15 | return False 16 | 17 | def emit(self, record): 18 | self.format(record) 19 | self.buffer.append(record.__dict__) 20 | 21 | def matches(self, **kwargs): 22 | """ 23 | Look for a saved dict whose keys/values match the supplied arguments. 24 | """ 25 | result = False 26 | for d in self.buffer: 27 | if self.matcher.matches(d, **kwargs): 28 | result = True 29 | break 30 | return result 31 | -------------------------------------------------------------------------------- /test/ccfutures/test/support/refleak_helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for changing test behaviour while hunting 3 | for refleaks 4 | """ 5 | 6 | _hunting_for_refleaks = False 7 | 8 | 9 | def hunting_for_refleaks(): 10 | return _hunting_for_refleaks 11 | -------------------------------------------------------------------------------- /test/channels_subtests/.gitignore: -------------------------------------------------------------------------------- 1 | test_bch 2 | test_ch 3 | test_gateway_messages 4 | test_poll 5 | test_send 6 | test_peek_pop 7 | test_wrong 8 | fch_test 9 | test_channelsets 10 | test_gateways 11 | test_fli 12 | perf_fch 13 | test_capnp 14 | 15 | -------------------------------------------------------------------------------- /test/channels_subtests/ch1.py: -------------------------------------------------------------------------------- 1 | from dragon.channels import Channel, Message, ChannelSendH, ChannelRecvH 2 | 3 | # create a Channel 4 | ch = Channel(1) 5 | 6 | # serialze the Channel and write that to a file 7 | ser = ch.serialize() 8 | ser_file = open("channel.dat", "wb") 9 | ser_file.write(ser) 10 | ser_file.close() 11 | 12 | # create and open a send handle to Channel 1 and a recv handle to Channel 2 13 | sh = ch.sendh() 14 | sh.open() 15 | 16 | # create a message we will keep sending over and over again 17 | sm = Message().create_alloc(512) 18 | mb = sm.bytes_memview() 19 | mb[1] = b"K" 20 | mb[511] = b"b" 21 | 22 | # run a loop where we will keep sending the message 23 | for i in range(100): 24 | sh.send(sm) 25 | 26 | # destroy the message and by default the underlying managed memory allocation 27 | sm.destroy() 28 | 29 | # wait for user input 30 | input("Enter to continue: ") 31 | 32 | # cleanup the channel 33 | ch.destroy() 34 | -------------------------------------------------------------------------------- /test/channels_subtests/ch2.py: -------------------------------------------------------------------------------- 1 | from dragon.channels import Channel, Message, ChannelSendH, ChannelRecvH 2 | 3 | # read the serialized channel from a file 4 | ser_file = open("channel.dat", "rb") 5 | ser = ser_file.read() 6 | ser_file.close() 7 | 8 | # attach to a Channel 9 | ch = Channel.attach(ser) 10 | 11 | # create and open a recv handle to Channel 12 | rh = ch.recvh() 13 | rh.open() 14 | 15 | # run a loop where we will keep receiving the message 16 | for i in range(100): 17 | msg = rh.recv() 18 | rb = msg.bytes_memview() 19 | print(f"Value at 1 is {rb[1]}") 20 | msg.destroy() 21 | 22 | # wait for user input 23 | input("Enter to continue: ") 24 | -------------------------------------------------------------------------------- /test/channels_subtests/test_capnp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | // ./test_capnp > test.out 7 | // capnp decode --packed ../../src/lib/message_defs.capnp DragonMessage < test.out 8 | // Running this should print: 9 | // ( header = (tc = 6, tag = 4, ref = 1, err = 0), 10 | // registerClient = (test = "Hello World!") ) 11 | 12 | int main(int argc, char* argv[]) { 13 | capnp::MallocMessageBuilder message; 14 | 15 | MessageDef::Builder msg = message.initRoot(); 16 | msg.setTc(6); 17 | msg.setTag(4); 18 | DDRegisterClientDef::Builder rc = msg.initDdRegisterClient(); 19 | rc.setRespFLI("Hello World!"); 20 | capnp::writePackedMessageToFd(1, message); 21 | return 0; 22 | } -------------------------------------------------------------------------------- /test/channels_subtests/test_capnp.py: -------------------------------------------------------------------------------- 1 | import capnp 2 | import dragon.infrastructure.message_defs_capnp as schema 3 | 4 | 5 | def main(): 6 | f = open("test.out", "rb") 7 | msg = schema.MessageDef.read_packed(f) 8 | print(msg.which()) 9 | print(msg.to_dict()) 10 | 11 | 12 | if __name__ == "__main__": 13 | main() 14 | -------------------------------------------------------------------------------- /test/connection/standalone_conn_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | import multiprocessing 5 | 6 | import dragon.infrastructure.standalone_conn 7 | 8 | 9 | def write_actor(writer): 10 | writer.send("hyenas") 11 | 12 | 13 | def read_actor(reader): 14 | msg = reader.recv() 15 | if msg == "hyenas": 16 | print("got hyenas") 17 | else: 18 | print("got {}, no hyenas".format(msg)) 19 | 20 | 21 | def main(): 22 | reader, writer = dragon.infrastructure.standalone_conn.Pipe(duplex=False) 23 | # reader, writer = multiprocessing.Pipe(duplex=False) 24 | write_proc = multiprocessing.Process(target=write_actor, args=(writer,)) 25 | read_proc = multiprocessing.Process(target=read_actor, args=(reader,)) 26 | 27 | write_proc.start() 28 | read_proc.start() 29 | 30 | write_proc.join() 31 | read_proc.join() 32 | 33 | 34 | if __name__ == "__main__": 35 | multiprocessing.set_start_method("spawn") 36 | main() 37 | -------------------------------------------------------------------------------- /test/debug/tester.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import pdb 4 | import sys 5 | 6 | import dragon.channels as dch 7 | import dragon.managed_memory as dmm 8 | 9 | import dragon.infrastructure.parameters as dp 10 | import dragon.infrastructure.facts as dfacts 11 | import dragon.infrastructure.debug_support as dds 12 | import dragon.utils as du 13 | 14 | 15 | def make_fake_inf_pool(): 16 | pool = dmm.MemoryPool.create(2**30, "mtp", dfacts.infrastructure_pool_muid_from_index(0)) 17 | dp.this_process.inf_pd = du.B64.bytes_to_str(pool.serialize()) 18 | be_ch = dch.Channel(mem_pool=pool, c_uid=dfacts.launcher_cuid_from_index(0)) 19 | dp.this_process.local_be_cd = du.B64.bytes_to_str(be_ch.serialize()) 20 | 21 | 22 | def main(): 23 | print("start") 24 | x = 17 25 | sys.breakpointhook = dds.dragon_debug_hook 26 | dds._TESTING_DEBUG_HOOK = True 27 | make_fake_inf_pool() 28 | 29 | print("breakpoint the first") 30 | breakpoint() 31 | 32 | y = 27 33 | 34 | print("breakpoint the second") 35 | breakpoint() 36 | 37 | print("done") 38 | return 0 39 | 40 | 41 | if __name__ == "__main__": 42 | exit(main()) 43 | -------------------------------------------------------------------------------- /test/globalservices/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/test/globalservices/__init__.py -------------------------------------------------------------------------------- /test/globalservices/jumbo_arg_test_target.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This is a target program for a managed process test 4 | # of large argument delivery. 5 | # it returns with 0 if everything is good 6 | # and with 1 if not, and creates its own logfile too. 7 | 8 | import sys 9 | 10 | import dragon.globalservices.api_setup as dapi 11 | import dragon.infrastructure.parameters as dparms 12 | import dragon.dlogging.util as dlog 13 | import logging 14 | 15 | 16 | def show_desc(desc): 17 | return f"len {len(desc)}: {desc[:4]} ... {desc[-4:]}" 18 | 19 | 20 | dlog.setup_logging(basename=f"jatt_id{dparms.this_process.my_puid}".format(), level=logging.DEBUG) 21 | log = logging.getLogger("") 22 | log.info("hello from jumbo arg test") 23 | 24 | expected_size = int(sys.argv[1]) 25 | log.info(f"expecting {expected_size} bytes") 26 | 27 | dapi.connect_to_infrastructure() 28 | 29 | if len(dapi._ARG_PAYLOAD) == expected_size: 30 | log.info("payload arrived ok") 31 | else: 32 | log.error(f"payload was {len(dapi._ARG_PAYLOAD)} bytes") 33 | exit(1) 34 | 35 | log.info("test finished successfully") 36 | exit(0) 37 | -------------------------------------------------------------------------------- /test/hashtable/.gitignore: -------------------------------------------------------------------------------- 1 | test_hashtable 2 | -------------------------------------------------------------------------------- /test/hashtable/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS ?= -fPIC -Wall -Ofast -fomit-frame-pointer 3 | 4 | ifeq ($(DRAGON_INCLUDE_DIR),) 5 | DRAGON_INCLUDE = $(shell dragon-config -o) 6 | else 7 | DRAGON_INCLUDE = -I $(DRAGON_INCLUDE_DIR) 8 | endif 9 | 10 | ifeq ($(DRAGON_LIB_DIR),) 11 | DRAGON_LINK = $(shell dragon-config -l) 12 | else 13 | DRAGON_LINK = -L $(shell echo ${DRAGON_LIB_DIR}) -ldragon 14 | endif 15 | 16 | INCLUDE = $(DRAGON_INCLUDE) 17 | LIBS = $(DRAGON_LINK) 18 | 19 | BIN_FILES = test_hashtable 20 | 21 | %.c.o: %.c 22 | $(CC) $(INCLUDE) $(CFLAGS) -c $< -o $@ 23 | 24 | default: build 25 | 26 | build: test_hashtable 27 | 28 | test_hashtable: test_hashtable.c.o 29 | $(CC) $(INCLUDE) $(CFLAGS) -o test_hashtable $< $(LIBS) -ldl 30 | 31 | clean: 32 | rm -rf *.o $(BIN_FILES) 33 | -------------------------------------------------------------------------------- /test/heapmanager/.gitignore: -------------------------------------------------------------------------------- 1 | test_heapmanager 2 | -------------------------------------------------------------------------------- /test/heapmanager/Makefile: -------------------------------------------------------------------------------- 1 | CC ?= gcc 2 | CFLAGS ?= -fPIC -Wall -Ofast -fomit-frame-pointer 3 | INCLUDE = -I $(DRAGON_INCLUDE_DIR) 4 | LIBS = -L $(DRAGON_LIB_DIR) 5 | 6 | BIN_FILES = test_heapmanager simple 7 | 8 | %.c.o: %.c 9 | $(CC) $(INCLUDE) $(CFLAGS) -c $< -o $@ 10 | 11 | default: build 12 | 13 | build: test_heapmanager 14 | 15 | test_heapmanager: test_heapmanager.c.o 16 | $(CC) $(INCLUDE) $(CFLAGS) -o test_heapmanager $< $(LIBS) -ldragon -ldl 17 | 18 | clean: 19 | rm -rf *.o $(BIN_FILES) 20 | -------------------------------------------------------------------------------- /test/infrastructure/MANIFEST: -------------------------------------------------------------------------------- 1 | Tests for internal infrastructure objects 2 | -------------------------------------------------------------------------------- /test/integration/slow_echo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo $2 3 | sleep $1 4 | -------------------------------------------------------------------------------- /test/launcher/bad_hostfile.txt: -------------------------------------------------------------------------------- 1 | host_1\thost_2 -------------------------------------------------------------------------------- /test/launcher/good_hostfile.txt: -------------------------------------------------------------------------------- 1 | host-1 2 | host-2 -------------------------------------------------------------------------------- /test/launcher/launcher_testing_utils.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from functools import wraps 3 | 4 | 5 | def catch_thread_exceptions(func): 6 | @wraps(func) 7 | def wrapper(*args, **kwargs): 8 | exceptions_caught_in_threads = {} 9 | 10 | def custom_excepthook(args): 11 | thread_name = args.thread.name 12 | exceptions_caught_in_threads[thread_name] = { 13 | "thread": args.thread, 14 | "exception": {"type": args.exc_type, "value": args.exc_value, "traceback": args.exc_traceback}, 15 | } 16 | 17 | # Registering our custom excepthook to catch the exception in the threads 18 | old_excepthook = threading.excepthook 19 | threading.excepthook = custom_excepthook 20 | 21 | result = func(*args + (exceptions_caught_in_threads,), **kwargs) 22 | 23 | threading.excepthook = old_excepthook 24 | return result 25 | 26 | return wrapper 27 | -------------------------------------------------------------------------------- /test/launcher/slurm.yaml: -------------------------------------------------------------------------------- 1 | '0': 2 | h_uid: null 3 | host_id: 18446744071562724608 4 | ip_addrs: 5 | - 10.128.0.5:6565 6 | is_primary: false 7 | name: nid00004 8 | num_cpus: 0 9 | physical_mem: 0 10 | shep_cd: '' 11 | state: 4 12 | '1': 13 | h_uid: null 14 | host_id: 18446744071562724864 15 | ip_addrs: 16 | - 10.128.0.6:6565 17 | is_primary: false 18 | name: nid00005 19 | num_cpus: 0 20 | physical_mem: 0 21 | shep_cd: '' 22 | state: 4 23 | '2': 24 | h_uid: null 25 | host_id: 18446744071562725120 26 | ip_addrs: 27 | - 10.128.0.7:6565 28 | is_primary: false 29 | name: nid00006 30 | num_cpus: 0 31 | physical_mem: 0 32 | shep_cd: '' 33 | state: 4 34 | '3': 35 | h_uid: null 36 | host_id: 18446744071562725376 37 | ip_addrs: 38 | - 10.128.0.8:6565 39 | is_primary: false 40 | name: nid00007 41 | num_cpus: 0 42 | physical_mem: 0 43 | shep_cd: '' 44 | state: 4 45 | -------------------------------------------------------------------------------- /test/launcher/slurm_bad.yaml: -------------------------------------------------------------------------------- 1 | '0': 2 | h_uid: null 3 | host_id: 18446744071562724608 4 | ip_addrs: 5 | - 10.128.0.5:6565 6 | is_primary: false 7 | name: nid00004 8 | num_cpus: 0 9 | physical_mem: 0 10 | shep_cd: '' 11 | state: 4 12 | '1': 13 | h_uid: null 14 | host_id: 18446744071562724864 15 | ip_addrs: 16 | - 10.128.0.6:6565 17 | is_primary: false 18 | name: nid00005 19 | num_cpus: 0 20 | physical_mem: 0 21 | shep_cd: '' 22 | state: 4 23 | '2': 24 | h_uid: null 25 | host_id: 18446744071562725120 26 | ip_addrs: 27 | - 10.128.0.7:6565 28 | is_primary: false 29 | name: nid00006 30 | num_cpus: 0 31 | physical_mem: 0 32 | shep_cd: '' 33 | state: 4 34 | '3': 35 | h_uid: null 36 | host_id: 18446744071562725376 37 | ip_addrs: 38 | - 10.128.0.8:6565 39 | is_primary: false 40 | junk-key: false 41 | name: nid00007 42 | num_cpus: 0 43 | physical_mem: 0 44 | shep_cd: '' 45 | state: 4 46 | -------------------------------------------------------------------------------- /test/launcher/slurm_primary.yaml: -------------------------------------------------------------------------------- 1 | '0': 2 | h_uid: null 3 | host_id: 18446744071562724608 4 | ip_addrs: 5 | - 10.128.0.5:6565 6 | is_primary: false 7 | name: nid00004 8 | host_name: nid00004 9 | num_cpus: 0 10 | physical_mem: 0 11 | shep_cd: '' 12 | state: 4 13 | '1': 14 | h_uid: null 15 | host_id: 18446744071562724864 16 | ip_addrs: 17 | - 10.128.0.6:6565 18 | is_primary: true 19 | name: nid00005 20 | host_name: nid00005 21 | num_cpus: 0 22 | physical_mem: 0 23 | shep_cd: '' 24 | state: 4 25 | '2': 26 | h_uid: null 27 | host_id: 18446744071562725120 28 | ip_addrs: 29 | - 10.128.0.7:6565 30 | is_primary: false 31 | name: nid00006 32 | host_name: nid00006 33 | num_cpus: 0 34 | physical_mem: 0 35 | shep_cd: '' 36 | state: 4 37 | '3': 38 | h_uid: null 39 | host_id: 18446744071562725376 40 | ip_addrs: 41 | - 10.128.0.8:6565 42 | is_primary: false 43 | name: nid00007 44 | host_name: nid00007 45 | num_cpus: 0 46 | physical_mem: 0 47 | shep_cd: '' 48 | state: 4 49 | -------------------------------------------------------------------------------- /test/launcher_multi/cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | find /dev/shm -user $USER -exec rm -fr {} \; 4 | find /dev/mqueue -user $USER -exec rm -fr {} \; 5 | -------------------------------------------------------------------------------- /test/launcher_multi/helloworld.py: -------------------------------------------------------------------------------- 1 | import os 2 | import dragon 3 | import multiprocessing as mp 4 | 5 | 6 | def runit(): 7 | print("Hello from Worker", os.uname().nodename) 8 | 9 | 10 | def main(): 11 | mp.set_start_method("dragon") 12 | print("Hello Dragon", os.uname().nodename) 13 | p = mp.Process(target=runit, args=()) 14 | p.start() 15 | p.join() 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /test/launcher_multi/runhello.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../../ && . hack/setup && . _env/bin/activate && cd - 4 | 5 | # Remove any cores lying 6 | rm core 7 | 8 | echo $PWD 9 | rm -f *.log 10 | 11 | echo "running test" 12 | dragon -l dragon-file=INFO -l actor-file=INFO ./helloworld.py -------------------------------------------------------------------------------- /test/launcher_multi/runtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "${1}" ]; then 4 | ta=tcp 5 | else 6 | ta=${1} 7 | fi 8 | 9 | unset DRAGON_TRANSPORT_TEST 10 | export DRAGON_TRANSPORT_AGENT=${ta} 11 | 12 | module use $PWD/../../src/modulefiles 13 | module load dragon-dev 14 | 15 | # Remove any cores lying around from the CTI destructor that does 16 | # a double free on an exit from main 17 | rm core 18 | 19 | # Can set up to 3 for increasing verbosity 20 | export MRNET_DEBUG_LEVEL=1 21 | export DRAGON_DEBUG=1 22 | # Setting CTI_DEBUG will turn on logging 23 | export CTI_DEBUG=1 24 | 25 | # Tell CTI where to log to 26 | mkdir -p log 27 | export CTI_LOG_DIR=$PWD/log 28 | 29 | rm -f log/* 30 | rm -f *.log 31 | # dragon_multi is a sym link to src/dragon/dragon_multi_fe.py 32 | dragon_multi ./serial_compute.py 33 | 34 | -------------------------------------------------------------------------------- /test/launcher_multi/runtransporttest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | export DRAGON_TRANSPORT_TEST=1 6 | 7 | cd ../../ && . hack/multinode_config 8 | cd - 9 | 10 | export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH} 11 | 12 | # Remove any cores lying around from the CTI destructor that does 13 | # a double free on an exit from main 14 | rm core > /dev/null 2>&1 15 | 16 | ulimit -c unlimited 17 | 18 | # Can set up to 3 for increasing verbosity 19 | export MRNET_DEBUG_LEVEL=1 20 | export DRAGON_DEBUG=1 21 | 22 | # Setting CTI_DEBUG will turn on logging 23 | export CTI_DEBUG=1 24 | export HSTA_DEBUG=cqe,general,gw_ch,tx_stats,work_req 25 | 26 | # Tell CTI where to log to 27 | mkdir -p log 28 | export CTI_LOG_DIR=$PWD/log 29 | 30 | rm -f log/* > /dev/null 2>&1 31 | rm -f *.log > /dev/null 2>&1 32 | 33 | srun ./cleanup 34 | 35 | dragon ./transport_test.py 36 | -------------------------------------------------------------------------------- /test/launcher_multi/serial_compute.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import socket 4 | import time 5 | 6 | 7 | def main(): 8 | """Open a file, write, sleep, and exit 9 | 10 | Just a simple test to confirm local services is 11 | capable of bringing up a serial process on a given compute node. 12 | 13 | """ 14 | _user = os.environ.get("USER", str(os.getuid())) 15 | _hostname = socket.gethostname() 16 | try: 17 | filename = os.path.join(os.environ.get("DRAGON_LA_LOG_DIR"), f"simple_compute_{_user}.txt") 18 | except TypeError: 19 | filename = f"/tmp/simple_compute_{_user}.txt" 20 | 21 | if os.path.exists(filename): 22 | os.remove(filename) 23 | 24 | for i in range(5): 25 | with open(filename, "a") as f: 26 | f.write(f"hello from {_hostname}\n") 27 | time.sleep(5) 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /test/launcher_multi/test_tcp_bringup/runhello.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../../../ && . hack/multinode_config && . _env/bin/activate && cd - 4 | 5 | # Tell CTI where to log to 6 | echo $PWD 7 | rm -f *.log 8 | 9 | # clean up stuff on the login 10 | rm -rf /dev/shm/_dragon* 11 | 12 | # dragon_multi is a sym link to src/dragon/dragon_multi_fe.py 13 | echo "running test" 14 | python3 ./test_launcher_fe.py 15 | -------------------------------------------------------------------------------- /test/launcher_multi/test_tcp_bringup/test_ifaddr.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import json 3 | from pprint import pprint 4 | 5 | 6 | def get_ip_addresses(): 7 | 8 | try: 9 | local_args = ["python3", "-m", "dragon.transport.ifaddrs", "--ip", "--no-loopback", "--up", "--running"] 10 | 11 | srun_args = ["srun", "--nodes=1"] 12 | 13 | local_proc = subprocess.run(local_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 14 | lout = json.loads(local_proc.stdout) 15 | print(f"Type: {type(lout)}") 16 | pprint(lout) 17 | remote_proc = subprocess.run(srun_args + local_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 18 | rout = json.loads(remote_proc.stdout) 19 | print(f"Type: {type(rout)}") 20 | pprint(rout) 21 | 22 | except Exception as e: 23 | raise RuntimeError(e) 24 | 25 | 26 | if __name__ == "__main__": 27 | 28 | get_ip_addresses() 29 | -------------------------------------------------------------------------------- /test/minconnection/test_minconnection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | from minconnection.minconnection_obj_tests import MinConnectionTest 5 | 6 | if __name__ == "__main__": 7 | unittest.main() 8 | -------------------------------------------------------------------------------- /test/mp_bench/automation_input/access_managed_dict_test_input.csv: -------------------------------------------------------------------------------- 1 | num_workers,spins,message_size,num_iterations 2 | 4,4,4,5 3 | 4,4,256,5 4 | 4,4,1000,5 5 | 4,4,4000,5 6 | 4,4,1000000,3 7 | 4,4,30000000,3 8 | 1,4,4,5 9 | 2,4,4,5 10 | 4,4,4,5 11 | 8,4,4,5 12 | 16,4,4,5 13 | 32,4,4,10 14 | 64,4,4,10 15 | 100,4,4,10 16 | 128,4,4,5 17 | 200,4,4,5 18 | 253,4,4,3 19 | 4,100,4,5 20 | 4,1,4,5 21 | 4,2,4,5 22 | 4,4,4,5 23 | 4,8,4,5 24 | 4,16,4,5 25 | 4,32,4,5 26 | 4,64,4,5 27 | 4,128,4,5 28 | 4,256,4,5 29 | 4,512,4,5 30 | 4,1024,4,5 31 | 4,2048,4,5 32 | 4,4096,4,2 33 | 4,4,1000000000,1 34 | 64,64,4,5 35 | 64,64,64,5 36 | 64,64,1000,5 37 | 64,64,4000,5 38 | 64,64,1000000,1 39 | 64,64,10000,5 40 | 64,64,100000,5 41 | 64,64,7000,5 42 | 64,64,6000,5 43 | 64,64,8000,5 44 | 64,64,9000,5 45 | 64,64,20000,5 46 | 64,64,15000,5 47 | 64,64,75000,2 48 | 64,64,50000,3 49 | -------------------------------------------------------------------------------- /test/mp_bench/automation_input/basic_managed_dict_test_input.csv: -------------------------------------------------------------------------------- 1 | num_workers,spins,message_size,num_iterations 2 | 4,10,1000000000,1 3 | 4,4,4,5 4 | 4,4,1000000,5 5 | 10,4,1000000,5 6 | 128,4,1000000,1 7 | 128,4,4000,10 8 | 128,4,1000,10 9 | 256,4,1000000,1 10 | 4,4,4,5 11 | 4,4,1,5 12 | 4,4,2,5 13 | 4,4,4,5 14 | 4,4,8,5 15 | 4,4,16,5 16 | 4,4,32,5 17 | 4,4,64,5 18 | 4,4,128,5 19 | 4,4,256,5 20 | 4,4,1000,5 21 | 4,4,4000,5 22 | 4,4,1000000,5 23 | 1,4,4,5 24 | 2,4,4,5 25 | 4,4,4,5 26 | 8,4,4,5 27 | 16,4,4,5 28 | 32,4,4,5 29 | 64,4,4,5 30 | 128,4,4,10 31 | 200,4,4,10 32 | 256,4,4,10 33 | 507,4,4,5 34 | 4,4,1000000000,5 35 | 4,100,4,5 36 | 4,1,4,5 37 | 4,2,4,5 38 | 4,4,4,5 39 | 4,8,4,5 40 | 4,16,4,5 41 | 4,32,4,5 42 | 4,64,4,5 43 | 4,128,4,5 44 | 4,256,4,5 45 | 4,512,4,5 46 | 4,1024,4,5 47 | 4,2048,4,5 48 | -------------------------------------------------------------------------------- /test/mp_bench/automation_input/basic_pool_test_input.csv: -------------------------------------------------------------------------------- 1 | num_workers,num_iterations 2 | 1,500 3 | 2,75 4 | 4,50 5 | 8,75 6 | 16,50 7 | 20,50 8 | 50,50 9 | 64,75 10 | 128,50 11 | 256,50 12 | 400,5 13 | 505,5 14 | -------------------------------------------------------------------------------- /test/mp_bench/automation_input/basic_process_test_input.csv: -------------------------------------------------------------------------------- 1 | num_workers,num_iterations 2 | 1,1000 3 | 2,100 4 | 4,100 5 | 8,50 6 | 16,50 7 | 20,50 8 | 50,50 9 | 64,200 10 | 128,75 11 | 256,10 12 | 400,15 13 | 508,5 14 | -------------------------------------------------------------------------------- /test/mp_bench/automation_input/pool_invocation_test_input.csv: -------------------------------------------------------------------------------- 1 | num_workers,num_iterations 2 | 1,250 3 | 2,250 4 | 4,100 5 | 8,100 6 | 16,250 7 | 20,250 8 | 32,100 9 | 50,50 10 | 64,50 11 | 128,50 12 | 256,50 13 | 400,15 14 | 505,10 15 | -------------------------------------------------------------------------------- /test/mp_bench/doc/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /test/mp_bench/doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /test/mp_bench/doc/generalities.rst: -------------------------------------------------------------------------------- 1 | ************ 2 | Generalities 3 | ************ 4 | 5 | 6 | Fork vs spawn 7 | ============= 8 | 9 | ``multiprocessing`` supports both 'fork' and 'spawn' methods for starting new interpreters 10 | under Unix. The 'fork' method is **considered harmful** because it violates the 11 | principle of every object knowing who its owner really is and every interpreter 12 | knowing what objects it exclusively owns. 13 | 14 | Therefore all the tests include:: 15 | 16 | multiprocessing.set_start_method('spawn') 17 | 18 | -------------------------------------------------------------------------------- /test/mp_bench/doc/index.rst: -------------------------------------------------------------------------------- 1 | .. Python Multiprocessing Benchmarks documentation master file, created by 2 | sphinx-quickstart on Thu Jan 2 17:09:29 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Python Multiprocessing Benchmarks's documentation! 7 | ============================================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | generalities 14 | benchmarks/pool_basic 15 | benchmarks/process_basic 16 | benchmarks/managed_dict_basic 17 | benchmarks/chain_token 18 | benchmarks/chain_token_pipe 19 | 20 | 21 | 22 | Indices and tables 23 | ================== 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | -------------------------------------------------------------------------------- /test/mp_bench/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # hn="$(hostname)" 3 | mp_benchdir=$PWD 4 | export PYTHONHASHSEED=0 5 | 6 | export PATH=$PATH:${mp_benchdir}/tests 7 | 8 | npp=${mp_benchdir}/util 9 | 10 | pp=$PYTHONPATH 11 | export PYTHONPATH=${pp:+$pp:}$npp # appends to path, sets if empty 12 | -------------------------------------------------------------------------------- /test/mp_bench/user_parameter_study/Makefile: -------------------------------------------------------------------------------- 1 | default: all 2 | 3 | all: a 4 | 5 | a: user_executable.c 6 | gcc user_executable.c 7 | 8 | clean myclean: 9 | rm a.out 10 | -------------------------------------------------------------------------------- /test/mp_bench/user_parameter_study/user_executable.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) { 5 | char *a = argv[1]; 6 | char *b = argv[2]; 7 | double c = atof(a); 8 | double d = atof(b); 9 | double result = (c*c) + (d*d); 10 | if (result < 1) { 11 | printf("True"); 12 | } else { 13 | printf("False"); 14 | } 15 | return 0; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /test/mp_bench/user_parameter_study/user_executable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | 4 | 5 | def in_circle(x, y): 6 | if ((x**2) + (y**2)) < 1: 7 | return True 8 | else: 9 | return False 10 | 11 | 12 | if __name__ == "__main__": 13 | print(in_circle(float(sys.argv[1]), float(sys.argv[2]))) 14 | -------------------------------------------------------------------------------- /test/mp_bench/util/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /test/mpbridge/test_heap.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import dragon 4 | import multiprocessing 5 | import multiprocessing.heap 6 | 7 | 8 | class TestMultiprocessingAPI(unittest.TestCase): 9 | def test_arena(self): 10 | size = 1024 11 | fd = 12 12 | a = multiprocessing.heap.Arena(size, fd=fd) 13 | 14 | self.assertTrue(a.size == size) 15 | self.assertTrue(a.fd == fd) 16 | self.assertIsInstance(a.buffer, memoryview) 17 | 18 | def test_with_BufferWrapper(self): 19 | bw = multiprocessing.heap.BufferWrapper(8) 20 | memory = bw.create_memoryview() 21 | self.assertIsInstance(memory, memoryview) 22 | 23 | 24 | if __name__ == "__main__": 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /test/mpbridge/test_lock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import unittest 3 | 4 | import dragon 5 | import multiprocessing as mp 6 | 7 | 8 | def setUpModule(): 9 | mp.set_start_method("dragon", force=True) 10 | 11 | 12 | class TestDragonLocks(unittest.TestCase): 13 | def setUp(self): 14 | self.assertEqual(mp.get_start_method(), "dragon") 15 | 16 | def test_semlock_hack(self): 17 | 18 | rlock = mp.RLock() 19 | 20 | success = rlock.acquire() 21 | self.assertTrue(success) 22 | 23 | self.assertTrue(rlock._semlock._is_mine()) 24 | self.assertTrue(1 == rlock._semlock._count()) 25 | 26 | for i in range(13): 27 | self.assertTrue(rlock.acquire()) 28 | self.assertTrue(i + 2 == rlock._semlock._count()) 29 | 30 | 31 | if __name__ == "__main__": 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /test/multi-node/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm /dev/shm/_dragon_test_pmod_mpi_hello_world* > /dev/null 2>&1 4 | -------------------------------------------------------------------------------- /test/multi-node/test_machine.py: -------------------------------------------------------------------------------- 1 | """This file contains Dragon multi-node acceptance tests for the 2 | `dragon.native.machine` module. The tests scales with the total number of CPUs 3 | reported by the allocation, i.e. it becomes tougher on larger allocations. 4 | 5 | The test is run with `dragon test_machine.py -f -v` 6 | """ 7 | 8 | import unittest 9 | import os 10 | 11 | import dragon 12 | from dragon.native.machine import cpu_count 13 | import multiprocessing as mp 14 | 15 | 16 | class TestMachineMultiNode(unittest.TestCase): 17 | """We need to test `cpu_count, or all other tests could be bogus.""" 18 | 19 | def test_on_multi_node(self): 20 | 21 | ncpu_native = max(2, cpu_count()) 22 | ncpu_mp = max(2, mp.cpu_count()) 23 | ncpu_posix = int(os.sysconf("SC_NPROCESSORS_ONLN")) 24 | 25 | self.assertTrue(ncpu_mp > ncpu_posix, "You're not using a multi-node allocation.") 26 | self.assertTrue(ncpu_native > ncpu_posix, "You're not using a multi-node allocation.") 27 | 28 | 29 | if __name__ == "__main__": 30 | mp.set_start_method("dragon") 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /test/native/test_logging.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from dragon.native.logging import log_to_dragon, log_to_stderr 3 | 4 | 5 | @unittest.skip(f"Dragon native logging interface not implemented yet (PE-41692)") 6 | class TestDragonNativeMisc(unittest.TestCase): 7 | def test_get_logger(): 8 | pass 9 | 10 | def test_log_to_stderr(): 11 | pass 12 | 13 | 14 | if __name__ == "__main__": 15 | unittest.main() 16 | -------------------------------------------------------------------------------- /test/native/test_machine.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import dragon 4 | 5 | from dragon.utils import host_id 6 | 7 | 8 | class TestMachineSingle(unittest.TestCase): 9 | def test_multinode_cpu_count(self): 10 | """Test the number of logical CPUs in the single node case""" 11 | 12 | the_cpu_count = os.cpu_count() 13 | 14 | dragons_cpu_count = dragon.native.machine.cpu_count() 15 | 16 | # Since we don't have multiple nodes, the count should equal our CPU count 17 | self.assertTrue(dragons_cpu_count == the_cpu_count) 18 | 19 | def test_node_list(self): 20 | """Test that we can get a list of the registered nodes""" 21 | 22 | hlist = dragon.globalservices.node.get_list() 23 | self.assertEqual(1, len(hlist)) 24 | 25 | def test_node_class(self): 26 | 27 | mynode = dragon.native.machine.current() 28 | self.assertTrue(mynode.h_uid == host_id()) 29 | 30 | 31 | if __name__ == "__main__": 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /test/pkg/Makefile: -------------------------------------------------------------------------------- 1 | export PYTHON = $(shell which python3 | awk '{if ($$1 == "") { print "python" } else { print "python3"}}') 2 | 3 | default: accepttest 4 | 5 | unittest: 6 | $(PYTHON) test_utils.py -v -f 7 | $(PYTHON) test_channels.py -v -f 8 | 9 | accepttest: 10 | cd multi-node && make all DEF_NODES="-N 2" 11 | -------------------------------------------------------------------------------- /test/pmod/.gitignore: -------------------------------------------------------------------------------- 1 | test_pmod 2 | -------------------------------------------------------------------------------- /test/pmod/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS ?= -fPIC -Wall -Ofast -fomit-frame-pointer 3 | 4 | ifeq ($(DRAGON_INCLUDE_DIR),) 5 | DRAGON_INCLUDE = $(shell dragon-config -o) 6 | else 7 | DRAGON_INCLUDE = -I $(DRAGON_INCLUDE_DIR) 8 | endif 9 | 10 | ifeq ($(DRAGON_LIB_DIR),) 11 | DRAGON_LINK = $(shell dragon-config -l) 12 | else 13 | DRAGON_LINK = -L $(shell echo ${DRAGON_LIB_DIR}) -ldragon 14 | endif 15 | 16 | INCLUDE = $(DRAGON_INCLUDE) -I ../../src/lib 17 | LIBS = $(DRAGON_LINK) 18 | 19 | BIN_FILES = test_pmod 20 | 21 | %.c.o: %.c 22 | $(CC) $(INCLUDE) $(CFLAGS) -c $< -o $@ 23 | 24 | default: build 25 | 26 | build: test_pmod 27 | 28 | test_pmod: test_pmod.c.o 29 | $(CC) $(INCLUDE) $(CFLAGS) -o test_pmod $< $(LIBS) -ldl 30 | 31 | clean: 32 | rm -rf *.o $(BIN_FILES) 33 | 34 | -------------------------------------------------------------------------------- /test/pmsgq/.gitignore: -------------------------------------------------------------------------------- 1 | test_pmsgq 2 | -------------------------------------------------------------------------------- /test/pmsgq/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -O3 -fomit-frame-pointer -fPIC -Wall 3 | CPPFLAGS = -I ../../src/include 4 | LDFLAGS = -L ../../src/lib 5 | LDLIBS = -lpmsgqueue -lrt -ldragon -ldl 6 | 7 | BIN_FILES = test_pmsgq 8 | 9 | .PHONY: all 10 | all: $(BIN_FILES) 11 | 12 | .PHONY: clean 13 | clean: 14 | $(RM) $(BIN_FILES) *.o 15 | -------------------------------------------------------------------------------- /test/pmsgq/ex.py: -------------------------------------------------------------------------------- 1 | from dragon.launcher.pmsgqueue import PMsgQueue 2 | import dragon.infrastructure.messages as dmsg 3 | import os 4 | 5 | pid = os.getpid() 6 | 7 | sendh = PMsgQueue("/test" + str(pid), write_intent=True) 8 | recvh = PMsgQueue("/test" + str(pid), read_intent=True) 9 | sendh.send("Bad Message") 10 | sendh.reset() 11 | recvh.reset() 12 | 13 | sendh.send(dmsg.SHPingBE(tag=0, shep_cd="", be_cd="", gs_cd="", default_pd="", inf_pd="").serialize()) 14 | 15 | msg = recvh.recv() 16 | msg = dmsg.parse(msg) 17 | print(repr(msg)) 18 | 19 | 20 | recvh.close() 21 | sendh.close(destroy=True) 22 | -------------------------------------------------------------------------------- /test/process/hello.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | print("Hello World") 3 | 4 | 5 | if __name__ == "__main__": 6 | main() 7 | -------------------------------------------------------------------------------- /test/process/mphello.py: -------------------------------------------------------------------------------- 1 | import dragon 2 | import multiprocessing as mp 3 | 4 | 5 | def f(): 6 | print("Hello World") 7 | 8 | 9 | def main(): 10 | p = mp.Process(target=f, args=()) 11 | p.start() 12 | p.join() 13 | 14 | 15 | if __name__ == "__main__": 16 | main() 17 | -------------------------------------------------------------------------------- /test/process/nativeprocess.py: -------------------------------------------------------------------------------- 1 | import dragon 2 | import dragon.native.process as nativeproc 3 | import logging 4 | 5 | LOG = logging.getLogger("NativeProcTest :") 6 | 7 | 8 | def main(): 9 | proc = nativeproc.Popen([], "hello.py", stdout=nativeproc.PIPE) 10 | 11 | try: 12 | while True: 13 | output = proc.stdout.recv() 14 | print("From the pipe:", output, end="", flush=True) 15 | except EOFError: 16 | pass 17 | except Exception as ex: 18 | print(f"Unexpected error: {repr(ex)}") 19 | 20 | del proc 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | -------------------------------------------------------------------------------- /test/release/hello.py: -------------------------------------------------------------------------------- 1 | print("Hello world") 2 | -------------------------------------------------------------------------------- /test/setup.sh: -------------------------------------------------------------------------------- 1 | if [[ -z "$PYTHONPATH" ]]; then 2 | export PYTHONPATH=$PWD 3 | else 4 | export PYTHONPATH=$PWD:$PWD/integration:$PYTHONPATH 5 | fi 6 | 7 | # part of a hack to skip the monkeypatching 8 | # in ./src/dragon/__init.py__ 9 | export NODRAGON=1 10 | 11 | # to enable access to support scripts needed 12 | # for the shepherd unit tests 13 | export PATH=$PATH:../shepherd 14 | 15 | function clean() { 16 | rm -f ./*.log dump_file 17 | rm -f /dev/shm/* 18 | rm -f .dragon_breakpoints 19 | } 20 | -------------------------------------------------------------------------------- /test/shepherd/README.md: -------------------------------------------------------------------------------- 1 | Running Shepherd Tests 2 | ======================== 3 | To run the tests you must go up one directory to the dragon/test 4 | directory. Then 5 | 6 | source ./setup.sh 7 | 8 | This will setup the PYTHONPATH correctly to run 9 | test cases directly from the source directory. Otherwise, 10 | you can build the test whole project and run from the build 11 | directory with the proper setup. 12 | 13 | If the Cray Python is not loaded then find the latest Python 14 | 15 | module avail 16 | 17 | and load it 18 | 19 | module load cray-python/3.8.5 20 | 21 | To run a particular test you type something like this from the test directory: 22 | 23 | ./shepherd/single_internal.py SingleInternal.test_process_create 24 | 25 | and to run all the tests you type this from the test directory. 26 | 27 | ./shepherd/single_internal.py SingleInternal 28 | 29 | 30 | -------------------------------------------------------------------------------- /test/shepherd/gs_stub.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def main(): 5 | print(os.environ) 6 | 7 | 8 | if __name__ == "__main__": 9 | main() 10 | -------------------------------------------------------------------------------- /test/shepherd/proc1.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def foo(): 5 | print("Hello World") 6 | 7 | 8 | if __name__ == "__main__": 9 | foo() 10 | -------------------------------------------------------------------------------- /test/shepherd/proc2.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def foo(): 5 | print("Hello World") 6 | print("Doing some more") 7 | 8 | 9 | if __name__ == "__main__": 10 | foo() 11 | -------------------------------------------------------------------------------- /test/shepherd/proc3.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def foo(): 5 | print("Hello World") 6 | time.sleep(3) 7 | print("Doing some more") 8 | 9 | 10 | if __name__ == "__main__": 11 | foo() 12 | -------------------------------------------------------------------------------- /test/shepherd/proc4.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def foo(): 5 | s = input("Enter some text: ") 6 | print("Here is the response text.") 7 | print(s) 8 | 9 | 10 | if __name__ == "__main__": 11 | foo() 12 | -------------------------------------------------------------------------------- /test/shepherd/procenv1.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def foo(): 5 | 6 | print(os.environ["new_env_var"]) 7 | 8 | 9 | if __name__ == "__main__": 10 | foo() 11 | -------------------------------------------------------------------------------- /test/shepherd/procenv2.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def foo(): 5 | 6 | print(os.environ["SHELL"]) 7 | 8 | 9 | if __name__ == "__main__": 10 | foo() 11 | -------------------------------------------------------------------------------- /test/shim_dragon_paths.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import sys 3 | import os 4 | 5 | 6 | # Needed by tests spawning a new python process by executable name 7 | orig_path = os.getenv("PATH") 8 | py_exe_path = str(pathlib.Path(sys.executable).parent) 9 | os.putenv("PATH", py_exe_path + ":" + orig_path) 10 | 11 | 12 | # Needed if no PYTHONPATH is set to point at dragon/src or dragon not installed in site-packages 13 | dragon_path = pathlib.Path(__file__).parent / ".." / "src" 14 | support_path = pathlib.Path(__file__).parent 15 | if not dragon_path.exists(): 16 | # Sometimes needed by fixtures such as launcher/fe_server.py 17 | dragon_path = pathlib.Path(__file__).parent / ".." / ".." / "src" 18 | support_path = pathlib.Path(__file__).parent / ".." 19 | 20 | for addl_path in (support_path, dragon_path): 21 | addl_abspath = str(addl_path.absolute()) 22 | if addl_abspath not in sys.path: 23 | sys.path.append(addl_abspath) 24 | -------------------------------------------------------------------------------- /test/support/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DragonHPC/dragon/cc005015e725eff74964b75a5a5165334b03dc50/test/support/__init__.py -------------------------------------------------------------------------------- /test/test_channels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import shim_dragon_paths 4 | import multiprocessing as mp 5 | import unittest 6 | from channels_subtests.test_basic_channels import ChannelCreateTest, ChannelTests 7 | 8 | if __name__ == "__main__": 9 | mp.set_start_method("spawn", force=True) 10 | unittest.main() 11 | -------------------------------------------------------------------------------- /test/test_connection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | from connection.connection_obj_tests import ConnectionTest 5 | from connection.standalone_conn_test import TestStandaloneConn 6 | 7 | if __name__ == "__main__": 8 | unittest.main() 9 | -------------------------------------------------------------------------------- /test/test_globalservices.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | from globalservices.test_refcounting import TestGSRefcounting 5 | from globalservices.single_internal import SingleInternal 6 | from globalservices.single_process_msg import SingleProcMsgChannels 7 | from globalservices.process_api import SingleProcAPIChannels 8 | from globalservices.group_api import GSGroupAPI 9 | 10 | if __name__ == "__main__": 11 | unittest.main() 12 | -------------------------------------------------------------------------------- /test/test_infrastructure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | from infrastructure.env_parameter_tests import LaunchParameterTest 5 | from infrastructure.newline_stream_wrapper_test import NewlineStreamWrapperTest 6 | 7 | 8 | if __name__ == "__main__": 9 | unittest.main() 10 | -------------------------------------------------------------------------------- /test/test_launcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | import logging 5 | 6 | from launcher.test_launch_options import LaunchSelectionTest, LaunchOptionsTest 7 | from launcher.test_network_config import FileNetworkConfigTest, SlurmNetworkConfigTest, PbsPalsNetworkConfigTest 8 | from launcher.test_network_config import SSHNetworkConfigTest 9 | from launcher.test_signal_handling import SigIntTest 10 | from launcher.test_frontend_bringup import FrontendBringUpTeardownTest 11 | from launcher.test_backend_bringup import BackendBringUpTeardownTest 12 | from launcher.test_resilient_restart import FrontendRestartTest 13 | 14 | 15 | if __name__ == "__main__": 16 | logging.basicConfig(level=logging.INFO) 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /test/test_mpbridge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | 5 | from mpbridge.test_pipe_with_process import SendReceiveTest 6 | from mpbridge.test_mpbridge_context_wait import TestDragonContextWait 7 | from mpbridge.test_mpbridge_basic import TestStartMethod 8 | from mpbridge.test_queue import TestQueue 9 | from mpbridge.test_condition import TestCondition 10 | from mpbridge.test_lock import TestDragonLocks 11 | from mpbridge.test_process import TestMPBridgeProcess 12 | from mpbridge.test_pool import TestMPBridgePool 13 | from mpbridge.test_api import TestMultiprocessingAPI, TestMultiprocessingInternalPatching 14 | from mpbridge.test_array import TestArray 15 | from mpbridge.test_value import TestValue 16 | 17 | # from mpbridge.test_barrier import TestBarrier 18 | 19 | 20 | def setUpModule(): 21 | import dragon 22 | import multiprocessing 23 | 24 | multiprocessing.set_start_method("dragon") 25 | 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /test/test_native.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | import multiprocessing as mp 5 | 6 | from native.test_process_group import TestDragonNativeProcessGroup 7 | from native.test_process import TestDragonNativeProcess 8 | from native.test_semaphore import TestSemaphore 9 | from native.test_machine import TestMachineSingle 10 | from native.test_value import TestValue 11 | from native.test_array import TestArray 12 | from native.test_barrier import TestBarrier 13 | from native.test_event import TestEvent 14 | from native.test_lock import TestLock 15 | from native.test_queue import TestQueue 16 | from native.test_redirection import TestIORedirection 17 | from native.test_ddict import TestDDict 18 | from native.test_ddict_c_driver import TestDDictC 19 | from native.test_ddict_cpp_driver import TestDDictCPP 20 | from native.test_pool import TestDragonNativePool 21 | 22 | if __name__ == "__main__": 23 | mp.set_start_method("dragon") 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /test/test_policy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import unittest 3 | from globalservices.test_policy_eval import TestPolicy 4 | from globalservices.test_policy_eval import TestEvaluator 5 | from globalservices.test_policy_eval import TestProcessPolicy 6 | from globalservices.test_policy_eval import TestGroupPolicy 7 | 8 | if __name__ == "__main__": 9 | unittest.main() 10 | -------------------------------------------------------------------------------- /test/test_telemetry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | import multiprocessing as mp 5 | import os 6 | 7 | from telemetry.test_aggregator_app import TestDragonTelemetryAggregatorApp 8 | from telemetry.test_collector import TestDragonTelemetryCollector 9 | from telemetry.test_dragon_server import TestDragonTelemetryDragonServer 10 | from telemetry.test_tsdb_app import TestDragonTelemetryTSDBApp, TestDragonTelemetryTSDBAppErrors 11 | from telemetry.test_tsdb_server import TestDragonTelemetryTSDBServer 12 | from telemetry.test_analysis import TestDragonTelemetryAnalysisClient, TestDragonTelemetryAnalysisServer 13 | import telemetry.telemetry_data 14 | 15 | 16 | if __name__ == "__main__": 17 | mp.set_start_method("dragon") 18 | unittest.main() 19 | -------------------------------------------------------------------------------- /test/test_transport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import shim_dragon_paths 3 | import unittest 4 | 5 | from transport.test_tcp_transport import SingleNodeTransportBench 6 | 7 | if __name__ == "__main__": 8 | unittest.main() 9 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import shim_dragon_paths 4 | import unittest 5 | from utils.test_basic_mempool import ( 6 | MemPoolCreateTest, 7 | MemoryPoolAllocTest, 8 | MemoryPoolAllocNoSetupTests, 9 | MemoryPoolAttachTests, 10 | ) 11 | 12 | from utils.test_mempool import MemPoolTest 13 | from utils.test_pyheap import PHeapTest 14 | from utils.test_logging import LoggingTest, TestLogHandler 15 | from utils.test_logging import TestLoggingSubprocesses 16 | 17 | # TEMPORARY HACK: somehow the use of parameterized in these tests 18 | # seem to be interfering with unittests's discovery on import. 19 | # This brutal method solves it for the time being but: 20 | # TODO: reorganize the tests overall 21 | with open("utils/test_locks.py") as fh: 22 | exec(fh.read()) 23 | 24 | if __name__ == "__main__": 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /test/transport/tcp/test_agent.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class AgentTestCase(unittest.IsolatedAsyncioTestCase): 5 | pass 6 | 7 | 8 | # Add dummy tests to avert py3.12 raising no tests run error 9 | class DummyTest(unittest.TestCase): 10 | def test_dummy(self): 11 | self.assertEqual(0, 0) 12 | 13 | 14 | if __name__ == "__main__": 15 | unittest.main() 16 | -------------------------------------------------------------------------------- /test/transport/tcp/test_errno.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from dragon.transport.tcp import errno 4 | 5 | 6 | class ErrnoTestCase(unittest.TestCase): 7 | 8 | def test_get_errno(self): 9 | # TODO Add tests for Dragon exceptions that have the `lib_err` attribute 10 | self.assertEqual(errno.get_errno(Exception()), errno.DRAGON_FAILURE) 11 | self.assertEqual(errno.get_errno(ValueError()), errno.DRAGON_INVALID_ARGUMENT) 12 | self.assertEqual(errno.get_errno(NotImplementedError()), errno.DRAGON_NOT_IMPLEMENTED) 13 | self.assertEqual(errno.get_errno(TimeoutError()), errno.DRAGON_TIMEOUT) 14 | 15 | 16 | if __name__ == "__main__": 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /test/transport/tcp/test_gateway_message.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class GatewayMessaageTestCase(unittest.TestCase): 5 | 6 | @unittest.skip 7 | def test_request(self): 8 | raise NotImplementedError 9 | 10 | @unittest.skip 11 | def test_complete(self): 12 | raise NotImplementedError 13 | 14 | 15 | if __name__ == "__main__": 16 | unittest.main() 17 | -------------------------------------------------------------------------------- /test/transport/tcp/test_streams.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class StreamsTestCase(unittest.IsolatedAsyncioTestCase): 5 | 6 | @unittest.skip 7 | async def test_close_writer(self): 8 | raise NotImplementedError 9 | 10 | @unittest.skip 11 | async def test_create_streams(self): 12 | raise NotImplementedError 13 | 14 | @unittest.skip 15 | async def test_create_pipe_streams(self): 16 | raise NotImplementedError 17 | 18 | @unittest.skip 19 | async def test_create_pipe_connections(self): 20 | raise NotImplementedError 21 | 22 | 23 | if __name__ == "__main__": 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /test/transport/tcp/test_task.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TaskTestCase(unittest.IsolatedAsyncioTestCase): 5 | 6 | @unittest.skip 7 | def test_run_forever(self): 8 | raise NotImplementedError 9 | 10 | @unittest.skip 11 | async def test_cancel_all_tasks(self): 12 | raise NotImplementedError 13 | 14 | @unittest.skip 15 | async def test_task_mixin(self): 16 | raise NotImplementedError 17 | 18 | 19 | if __name__ == "__main__": 20 | unittest.main() 21 | -------------------------------------------------------------------------------- /test/transport/tcp/test_util.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class UtilsTestCase(unittest.TestCase): 5 | 6 | @unittest.skip 7 | def test_unget_nowait(self): 8 | raise NotImplementedError 9 | 10 | @unittest.skip 11 | def test_seconds_remaining(self): 12 | raise NotImplementedError 13 | 14 | @unittest.skip 15 | def test_mem_descr_msg(self): 16 | pass 17 | 18 | @unittest.skip 19 | def test_mem_pool_msg(self): 20 | pass 21 | 22 | @unittest.skip 23 | def test_create_msg(self): 24 | pass 25 | 26 | @unittest.skip 27 | def test_attach_channel(self): 28 | raise NotImplementedError 29 | 30 | @unittest.skip 31 | def test_open_handle(self): 32 | raise NotImplementedError 33 | 34 | 35 | if __name__ == "__main__": 36 | unittest.main() 37 | -------------------------------------------------------------------------------- /test/utils/.gitignore: -------------------------------------------------------------------------------- 1 | lock_bench 2 | test_threaded_lock 3 | ulist_test 4 | umap_test 5 | test_attach 6 | test_heap 7 | test_log 8 | test_mem 9 | test_serialized_uid 10 | test_queue 11 | --------------------------------------------------------------------------------