├── test ├── config │ ├── __init__.py │ └── test_training_output.py ├── moddir │ ├── __init__.py │ └── simple_mod.py ├── metrics │ ├── __init__.py │ ├── classification │ │ ├── __init__.py │ │ ├── data │ │ │ └── config_classification.json │ │ └── test_classification_metrics.py │ └── objectdetection │ │ ├── __init__.py │ │ └── data │ │ ├── prc.csv │ │ ├── fscore.csv │ │ ├── ground_truth_no_annos.json │ │ ├── config.json │ │ └── default_metrics_with_formatter.json ├── mmdetection │ └── __init__.py ├── pytorch │ ├── __init__.py │ └── test_utils.py ├── tf_test │ └── __init__.py ├── test_logging.py ├── test_image.py ├── test_utils.py ├── test_transforms.py ├── test_tabular_data_set.py └── test_coco_annotations.py ├── juneberry ├── config │ ├── __init__.py │ ├── plugin.py │ ├── report.py │ └── hashes.py ├── onnx │ ├── __init__.py │ ├── onnx_model_zoo │ │ └── __init__.py │ ├── utils.py │ └── default.py ├── tuning │ ├── __init__.py │ └── reporter.py ├── evaluation │ └── __init__.py ├── metrics │ ├── __init__.py │ ├── classification │ │ ├── __init__.py │ │ ├── sklearn │ │ │ ├── __init__.py │ │ │ └── metrics.py │ │ ├── torchnn │ │ │ ├── __init__.py │ │ │ └── metrics.py │ │ ├── tensorflow │ │ │ └── __init__.py │ │ ├── torchmetrics │ │ │ ├── __init__.py │ │ │ └── metrics.py │ │ └── metrics.py │ └── objectdetection │ │ ├── __init__.py │ │ └── brambox │ │ ├── __init__.py │ │ ├── utils.py │ │ └── format.py ├── pytorch │ ├── __init__.py │ ├── torchvision.py │ ├── privacy │ │ └── model_transforms.py │ └── tabular_dataset.py ├── reporting │ ├── __init__.py │ └── report.py ├── schemas │ ├── __init__.py │ ├── hashes_schema.json │ ├── workspace_schema.json │ ├── tuning_schema.json │ ├── experiment_outline_schema.json │ ├── rules_list_schema.json │ ├── property_inference_attack_schema.json │ ├── tuning_output_schema.json │ ├── coco_anno_schema.json │ ├── evaluation_output_schema.json │ └── experiment_schema.json ├── scripting │ ├── __init__.py │ ├── tuning_sprout.py │ ├── sprout.py │ └── training_sprout.py ├── tensorflow │ ├── __init__.py │ ├── evaluation │ │ └── __init__.py │ └── utils.py ├── training │ └── __init__.py ├── transforms │ ├── __init__.py │ ├── tabular.py │ ├── random_shift.py │ ├── random_crop_mirror.py │ ├── random_mirror_flip.py │ └── image.py ├── __init__.py ├── architectures │ └── pytorch │ │ └── torchvision.py ├── detectron2 │ ├── utils.py │ └── transforms.py ├── platform.py └── tensorboard.py ├── MANIFEST.in ├── docs ├── specs │ ├── attack_configuration_specification.md │ └── rules_list_specification.md ├── logo.png ├── zoo.md ├── software_maintenance.md └── building_docker.md ├── docker ├── databricks │ ├── gconf │ │ ├── gmetad.conf │ │ ├── databricks-gmond.conf │ │ └── conf.d │ │ │ └── modpython.conf │ ├── spark-slave-not-active │ ├── ganglia-monitor-not-active │ ├── gmetad-not-active │ ├── ganglia.conf │ ├── build.sh │ └── monit ├── container_start.sh ├── build.sh ├── set_user.sh └── run_container_minimal.sh ├── .github ├── workflows │ ├── docker-retag-cpudev.yml │ ├── docker-retag-cudadev-latest.yml │ ├── docker-retag-cudadev.yml │ ├── docker-retag-cudabricks.yml │ ├── docker-image-cpudev.yml │ ├── docker-image-cudadev.yml │ ├── docker-image-cudabricks-base.yml │ └── docker-image-cudabricks.yml └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── requirements-cpu.txt ├── requirements.txt ├── .gitignore ├── CONTRIBUTORS.md ├── scripts ├── setup_lab.py ├── dry_run_all_models.py ├── predictions_to_coco.py ├── merge_predictions.py ├── reformat_predictions.py ├── draw_boxes_from_anno_file.py ├── coco_image_use.py └── model_transform.py ├── README.md ├── LICENSE.txt ├── setup.py └── bin ├── jb_run_plugin └── jb_clean_experiment_evals /test/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/moddir/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/tuning/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/mmdetection/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/tf_test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/reporting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/scripting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/onnx/onnx_model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/tensorflow/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/metrics/classification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/metrics/objectdetection/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/metrics/classification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/metrics/objectdetection/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include juneberry/schemas * 2 | -------------------------------------------------------------------------------- /docs/specs/attack_configuration_specification.md: -------------------------------------------------------------------------------- 1 | TODO -------------------------------------------------------------------------------- /juneberry/metrics/classification/sklearn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/metrics/classification/torchnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/metrics/objectdetection/brambox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/metrics/classification/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /juneberry/metrics/classification/torchmetrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-sei/juneberry/HEAD/docs/logo.png -------------------------------------------------------------------------------- /juneberry/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | __version__ = "0.5.1" 4 | -------------------------------------------------------------------------------- /docker/databricks/gconf/gmetad.conf: -------------------------------------------------------------------------------- 1 | data_source "cluster" localhost 2 | setuid_username "ganglia" 3 | xml_port 8651 -------------------------------------------------------------------------------- /juneberry/schemas/hashes_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "description": "A configuration storing model hashes", 4 | "properties": { 5 | "model_architecture": { "type" : "string" } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /docker/databricks/spark-slave-not-active: -------------------------------------------------------------------------------- 1 | \n\ 2 | check process spark-slave with pidfile /tmp/spark-root-org.apache.spark.deploy.worker.Worker-1.pid\n\ 3 | start program = "/databricks/spark/scripts/restart-workers"\n\ 4 | stop program = "/databricks/spark/scripts/kill_worker.sh"\n\ 5 | -------------------------------------------------------------------------------- /test/metrics/objectdetection/data/prc.csv: -------------------------------------------------------------------------------- 1 | precision,recall,confidence 2 | 0.3333333333333333,0.06666666666666667,0.9 3 | 0.6,0.2,0.8 4 | 0.6666666666666666,0.26666666666666666,0.75 5 | 0.625,0.3333333333333333,0.7 6 | 0.6,0.4,0.6 7 | 0.4666666666666667,0.4666666666666667,0.5 8 | 0.3684210526315789,0.4666666666666667,0.4 9 | -------------------------------------------------------------------------------- /docker/databricks/ganglia-monitor-not-active: -------------------------------------------------------------------------------- 1 | \n\ 2 | check process ganglia-monitor with pidfile /var/run/ganglia-monitor.pid\n\ 3 | start program = "/usr/sbin/service ganglia-monitor start"\n\ 4 | stop program = "/usr/sbin/service ganglia-monitor stop"\n\ 5 | if memory usage > 500 MB for 3 cycles then restart\n\ 6 | -------------------------------------------------------------------------------- /test/metrics/objectdetection/data/fscore.csv: -------------------------------------------------------------------------------- 1 | f1,recall,confidence 2 | 0.1111111111111111,0.06666666666666667,0.9 3 | 0.3,0.2,0.8 4 | 0.3809523809523809,0.26666666666666666,0.75 5 | 0.43478260869565216,0.3333333333333333,0.7 6 | 0.48,0.4,0.6 7 | 0.4666666666666667,0.4666666666666667,0.5 8 | 0.4117647058823529,0.4666666666666667,0.4 9 | -------------------------------------------------------------------------------- /docker/container_start.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # Setup juneberry 4 | echo "Installing Juneberry..." 5 | pip install -e /juneberry 6 | 7 | # Add in the bash completion 8 | source /juneberry/scripts/juneberry_completion.sh 9 | 10 | # Install any workspace code 11 | if [ -e "./setup.py" ]; then 12 | echo "Installing workspace..." 13 | pip install -e . 14 | fi 15 | 16 | -------------------------------------------------------------------------------- /docker/databricks/gconf/databricks-gmond.conf: -------------------------------------------------------------------------------- 1 | cluster { 2 | name = "cluster" 3 | owner = "unspecified" 4 | latlong = "unspecified" 5 | url = "unspecified" 6 | } 7 | 8 | udp_send_channel { 9 | host = 10.126.246.34 10 | port = 8649 11 | ttl = 1 12 | } 13 | 14 | /* This is set for gmond running on the driver to receive metrics from other gmonds, and is not 15 | in use for gmonds running on workers. */ 16 | udp_recv_channel { 17 | port = 8649 18 | } -------------------------------------------------------------------------------- /docker/databricks/gconf/conf.d/modpython.conf: -------------------------------------------------------------------------------- 1 | /* 2 | params - path to the directory where mod_python 3 | should look for python metric modules 4 | 5 | the "pyconf" files in the include directory below 6 | will be scanned for configurations for those modules 7 | */ 8 | modules { 9 | module { 10 | name = "python_module" 11 | path = "/usr/lib/ganglia/modpython.so" 12 | params = "/usr/lib/ganglia/python_modules" 13 | } 14 | } 15 | 16 | include ('/etc/ganglia/conf.d/*.pyconf') 17 | -------------------------------------------------------------------------------- /docker/databricks/gmetad-not-active: -------------------------------------------------------------------------------- 1 | \n\ 2 | check process gmetad with pidfile /var/run/gmetad.pid\n\ 3 | start program = "/usr/sbin/service gmetad start"\n\ 4 | stop program = "/usr/sbin/service gmetad stop"\n\ 5 | if memory usage > 500 MB for 3 cycles then restart\n\ 6 | \n\ 7 | check process apache2 with pidfile /var/run/apache2/apache2.pid\n\ 8 | start program = "/usr/sbin/service apache2 start"\n\ 9 | stop program = "/usr/sbin/service apache2 stop"\n\ 10 | if memory usage > 500 MB for 3 cycles then restart\n\ 11 | -------------------------------------------------------------------------------- /.github/workflows/docker-retag-cpudev.yml: -------------------------------------------------------------------------------- 1 | name: Retag cpudev-dev to cpudev 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Retag cpudev-dev to cpudev 15 | uses: tinact/docker.image-retag@master 16 | with: 17 | image_name: cmusei/juneberry 18 | image_old_tag: cpudev-dev 19 | image_new_tag: cpudev 20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }} 21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }} 22 | -------------------------------------------------------------------------------- /.github/workflows/docker-retag-cudadev-latest.yml: -------------------------------------------------------------------------------- 1 | name: Retag cudadev to latest 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Retag cudadev to latest 15 | uses: tinact/docker.image-retag@master 16 | with: 17 | image_name: cmusei/juneberry 18 | image_old_tag: cudadev 19 | image_new_tag: latest 20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }} 21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }} 22 | -------------------------------------------------------------------------------- /.github/workflows/docker-retag-cudadev.yml: -------------------------------------------------------------------------------- 1 | name: Retag cudadev-dev to cudadev 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Retag cudadev-dev to cudadev 15 | uses: tinact/docker.image-retag@master 16 | with: 17 | image_name: cmusei/juneberry 18 | image_old_tag: cudadev-dev 19 | image_new_tag: cudadev 20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }} 21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }} 22 | -------------------------------------------------------------------------------- /requirements-cpu.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.7.0 2 | tensorflow-datasets==4.4.0 3 | doit==0.33.1 4 | numpy==1.19.5 5 | pycocotools==2.0.2 6 | matplotlib==3.4.3 7 | Pillow==8.3.2 8 | prodict==0.8.18 9 | hjson==3.0.2 10 | jsonschema==4.1.0 11 | sklearn==0.0 12 | tensorboard==2.6.0 13 | torch==1.8.0 14 | torchvision==0.9.0 15 | torch-summary==1.4.5 16 | pandas==1.3.3 17 | brambox==3.2.0 18 | PyYAML==5.4.1 19 | natsort==7.1.1 20 | opacus==0.14.0 21 | protobuf==3.16.0 22 | onnx==1.10.1 23 | onnxruntime==1.10.0 24 | tf2onnx==1.9.3 25 | opencv-python==4.5.5.62 26 | tqdm==4.62.3 27 | pytest==6.2.5 28 | pylint==2.10.2 29 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.7.0 2 | tensorflow-datasets==4.4.0 3 | doit==0.33.1 4 | numpy==1.19.5 5 | pycocotools==2.0.2 6 | matplotlib==3.4.3 7 | Pillow==8.3.2 8 | prodict==0.8.18 9 | hjson==3.0.2 10 | jsonschema==4.1.0 11 | sklearn==0.0 12 | tensorboard==2.6.0 13 | torch==1.8.0 14 | torchvision==0.9.0 15 | torch-summary==1.4.5 16 | pandas==1.3.3 17 | brambox==3.2.0 18 | PyYAML==5.4.1 19 | natsort==7.1.1 20 | opacus==0.14.0 21 | protobuf==3.16.0 22 | onnx==1.10.1 23 | onnxruntime-gpu==1.10.0 24 | tf2onnx==1.9.3 25 | opencv-python==4.5.5.62 26 | tqdm==4.62.3 27 | pytest==6.2.5 28 | pylint==2.10.2 29 | -------------------------------------------------------------------------------- /.github/workflows/docker-retag-cudabricks.yml: -------------------------------------------------------------------------------- 1 | name: Retag cudabricks-dev to cudabricks 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Retag cudabricks-dev to cudabricks 15 | uses: tinact/docker.image-retag@master 16 | with: 17 | image_name: cmusei/juneberry 18 | image_old_tag: cudabricks-dev 19 | image_new_tag: cudabricks 20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }} 21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }} 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.h5 2 | *.log 3 | *.png 4 | *.pyc 5 | *.pt 6 | *.swp 7 | .DS_Store 8 | .idea/ 9 | .vscode/ 10 | .venv/ 11 | .dvc/ 12 | Juneberry.egg-info 13 | .coverage 14 | htmlcov/ 15 | 16 | # Ignore models except the config and DVC files 17 | models/*/* 18 | !models/**/config.json 19 | !models/**/*.dvc 20 | 21 | experiments/*/* 22 | !experiments/**/*.json 23 | !experiments/**/*.dvc 24 | experiments/**/rules.json 25 | 26 | 27 | # The checkpoints directory will contain checkpoints from the container 28 | checkpoints/ 29 | 30 | # Ignore some tool support directories 31 | scripts/resnet-summaries/latest 32 | 33 | # A place to hide things 34 | nothingtoseehere 35 | 36 | # We don't want doit stuff 37 | **/.doit.db* 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. Here are some of the things we'd like to know. 12 | 13 | **To Reproduce** 14 | - Which tool (train, evaluate) 15 | - What model architecture? (Add config file?) 16 | - What dataset? (Add config file?) 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **Screenshots or log output** 22 | If applicable, add screenshots to help explain your problem. 23 | 24 | **Platform** 25 | - Which container version? 26 | - Does it happen in CPU or GPU mode only? If GPU how many GPUs? 27 | 28 | **Additional context** 29 | Add any other context about the problem here. 30 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Version 0.4 Contributors 2 | 3 | * Andrew Mellinger - SEI/CMU 4 | * Annika Horgan - SEI/CMU 5 | * Anusha Sinha - SEI/CMU 6 | * Jay Palat - SEI/CMU 7 | * Matthew Churilla - SIE/CMU 8 | * Michael Vincente - SEI/CMU 9 | * Nathan VanHoudnos - SEI/CMU 10 | * Nick Winski - SEI/CMU 11 | * Robert Beveridge - SEI/CMU 12 | * Todd Loizes - SEI/CMU 13 | * Violet Turri - SEI/CMU 14 | * William R. Nichols - SEI/CMU 15 | 16 | # Version 0.5 Contributors 17 | 18 | * Andrew Mellinger - SEI/CMU 19 | * Anusha Sinha - SEI/CMU 20 | * Bill Shaw - SEI/CMU 21 | * Bryan Brown - SEI/CMU 22 | * Daniel Justice - SEI/CMU 23 | * Dominique Mittermeier - SEI/CMU 24 | * Hayden Moore - SEI/CMU 25 | * John Zucca - SEI/CMU 26 | * Jon Helland - SEI/CMU 27 | * Jordan Widjaja - SEI/CMU 28 | * Matthew Churilla - SIE/CMU 29 | * Nathan VanHoudnos - SEI/CMU 30 | * Nick Winski - SEI/CMU 31 | * Shannon Gallagher - SEI/CMU 32 | * William R. Nichols - SEI/CMU 33 | -------------------------------------------------------------------------------- /test/metrics/objectdetection/data/ground_truth_no_annos.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": [ 3 | ], 4 | "images": [ 5 | { 6 | "file_name": "img_1.jpg", 7 | "coco_url": "", 8 | "height": 200, 9 | "width": 200, 10 | "id": "1" 11 | }, 12 | { 13 | "file_name": "img_2.jpg", 14 | "coco_url": "", 15 | "height": 200, 16 | "width": 200, 17 | "id": "2" 18 | } 19 | ], 20 | "categories": [ 21 | { 22 | "supercategory": "sc_1", 23 | "id": 1, 24 | "name": "class_1" 25 | }, 26 | { 27 | "supercategory": "sc_1", 28 | "id": 2, 29 | "name": "class_2" 30 | } 31 | ], 32 | "info": { 33 | "description": "Metrics Unit Test (no annos)", 34 | "year": 2021, 35 | "date_created": "2021/11/11" 36 | } 37 | } -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | if [ $# -lt 1 ]; then 4 | echo "This script requires one argument, the part BEFORE the '.Dockerfile'" 5 | echo "e.g. 'cudadev' or 'cpudev'" 6 | echo "Optionally a docker tag can be provided as a second argument otherwise 'dev' is used." 7 | exit -1 8 | fi 9 | 10 | REV="dev" 11 | if [ $# -eq 2 ]; then 12 | REV=${2} 13 | fi 14 | 15 | TARGET_TAG="juneberry/${1}:${REV}" 16 | SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 17 | DOCKERFILE=${SCRIPT_DIR}/${1}.Dockerfile 18 | 19 | echo "Building: ${DOCKERFILE} into ${TARGET_TAG}" 20 | 21 | docker build --no-cache \ 22 | --build-arg HTTP_PROXY=${HTTP_PROXY} \ 23 | --build-arg http_proxy=${http_proxy} \ 24 | --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ 25 | --build-arg https_proxy=${https_proxy} \ 26 | --build-arg NO_PROXY=${NO_PROXY} \ 27 | --build-arg no_proxy=${no_proxy} \ 28 | --network=host -f "${DOCKERFILE}" -t ${TARGET_TAG} ${SCRIPT_DIR} 29 | -------------------------------------------------------------------------------- /test/metrics/objectdetection/data/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "evaluation_metrics": [ 3 | { 4 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Summary", 5 | "kwargs": { 6 | "iou_threshold": 0.5, 7 | "tp_threshold": 0.8 8 | } 9 | }, 10 | { 11 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Coco", 12 | "kwargs": { 13 | "iou_threshold": 0.5, 14 | "max_det": 100, 15 | "tqdm": false 16 | } 17 | }, 18 | { 19 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Tide", 20 | "kwargs": { 21 | "pos_thresh": 0.5, 22 | "bg_thresh": 0.5, 23 | "max_det": 100, 24 | "area_range_min": 0, 25 | "area_range_max": 100000, 26 | "tqdm": false 27 | } 28 | } 29 | ] 30 | } -------------------------------------------------------------------------------- /juneberry/schemas/workspace_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "description": "A configuration for the workspace", 4 | "properties": { 5 | "profiles": { 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "include": {"type": "string"}, 11 | "model": {"type": "string"}, 12 | "name": {"type": "string"}, 13 | "profile": { 14 | "type": "object", 15 | "properties": { 16 | "max_gpus": {"type": "integer"}, 17 | "no_paging": {"type": "boolean"}, 18 | "num_gpus": {"type": "integer"}, 19 | "num_workers": {"type": "integer"} 20 | } 21 | } 22 | }, 23 | "required": ["model", "name", "profile"] 24 | } 25 | } 26 | } 27 | } -------------------------------------------------------------------------------- /docker/databricks/ganglia.conf: -------------------------------------------------------------------------------- 1 | 2 | ServerAdmin webmaster@localhost 3 | 4 | # Show Ganglia UI by default 5 | DocumentRoot /usr/share/ganglia-webfrontend 6 | 7 | # Available loglevels: trace8, ..., trace1, debug, info, notice, warn, 8 | # error, crit, alert, emerg. 9 | # It is also possible to configure the loglevel for particular 10 | # modules, e.g. 11 | #LogLevel info ssl:warn 12 | 13 | ErrorLog ${APACHE_LOG_DIR}/error.log 14 | CustomLog ${APACHE_LOG_DIR}/access.log combined 15 | 16 | # For most configuration files from conf-available/, which are 17 | # enabled or disabled at a global level, it is possible to 18 | # include a line for only one particular virtual host. For example the 19 | # following line enables the CGI configuration for this host only 20 | # after it has been globally disabled with "a2disconf". 21 | #Include conf-available/serve-cgi-bin.conf 22 | 23 | 24 | # vim: syntax=apache ts=4 sw=4 sts=4 sr noet 25 | -------------------------------------------------------------------------------- /docker/databricks/build.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | REV="dev" 4 | if [ $# -eq 1 ]; then 5 | REV=${1} 6 | fi 7 | 8 | # SCRIPT DIR is our context dir. 9 | SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 10 | 11 | TARGET_TAG="juneberry/cudabricks-base:${REV}" 12 | 13 | echo "Building: Base into ${TARGET_TAG}" 14 | 15 | docker build --no-cache \ 16 | --build-arg HTTP_PROXY=${HTTP_PROXY} \ 17 | --build-arg http_proxy=${http_proxy} \ 18 | --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ 19 | --build-arg https_proxy=${https_proxy} \ 20 | --build-arg NO_PROXY=${NO_PROXY} \ 21 | --build-arg no_proxy=${no_proxy} \ 22 | --network=host -f base.Dockerfile -t ${TARGET_TAG} ${SCRIPT_DIR} 23 | 24 | TARGET_TAG="juneberry/cudabricks:${REV}" 25 | 26 | echo "Building: Ganglia into ${TARGET_TAG}" 27 | 28 | docker build --no-cache \ 29 | --build-arg HTTP_PROXY=${HTTP_PROXY} \ 30 | --build-arg http_proxy=${http_proxy} \ 31 | --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ 32 | --build-arg https_proxy=${https_proxy} \ 33 | --build-arg NO_PROXY=${NO_PROXY} \ 34 | --build-arg no_proxy=${no_proxy} \ 35 | --network=host -f ganglia.Dockerfile -t ${TARGET_TAG} ${SCRIPT_DIR} 36 | -------------------------------------------------------------------------------- /test/metrics/objectdetection/data/default_metrics_with_formatter.json: -------------------------------------------------------------------------------- 1 | { 2 | "evaluation_metrics": [ 3 | { 4 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Coco", 5 | "kwargs": { 6 | "iou_threshold": 0.5, 7 | "max_det": 100, 8 | "tqdm": false 9 | } 10 | }, 11 | { 12 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Summary", 13 | "kwargs": { 14 | "iou_threshold": 0.5, 15 | "tp_threshold": 0.8 16 | } 17 | }, 18 | { 19 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Tide", 20 | "kwargs": { 21 | "pos_thresh": 0.5, 22 | "bg_thresh": 0.5, 23 | "max_det": 100, 24 | "area_range_min": 0, 25 | "area_range_max": 100000, 26 | "tqdm": false 27 | } 28 | } 29 | ], 30 | "evaluation_metrics_formatter": { 31 | "fqcn": "juneberry.metrics.objectdetection.brambox.format.DefaultFormatter", 32 | "kwargs": { 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /.github/workflows/docker-image-cpudev.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI CPUDev 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Check base machine storage 15 | run: df -h / 16 | - 17 | name: Delete unused packages and tools 18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell && 19 | sudo apt-get autoremove -y && 20 | sudo apt-get clean && 21 | rm -rf /usr/share/dotnet/ 22 | rm -rf /opt/hostedtoolcache 23 | - 24 | name: Check base machine storage (after delete) 25 | run: df -h / 26 | - 27 | name: Checkout 28 | uses: actions/checkout@v2 29 | - 30 | name: Login to DockerHub 31 | uses: docker/login-action@v1 32 | with: 33 | username: ${{ secrets.DOCKERHUB_USERNAME }} 34 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 35 | - 36 | name: Build and push 37 | uses: docker/build-push-action@v2 38 | with: 39 | context: ./docker 40 | push: true 41 | file: docker/cpudev.Dockerfile 42 | tags: cmusei/juneberry:cpudev-dev 43 | -------------------------------------------------------------------------------- /.github/workflows/docker-image-cudadev.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI CUDADev 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Check base machine storage 15 | run: df -h / 16 | - 17 | name: Delete unused packages and tools 18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell && 19 | sudo apt-get autoremove -y && 20 | sudo apt-get clean && 21 | rm -rf /usr/share/dotnet/ 22 | rm -rf /opt/hostedtoolcache 23 | - 24 | name: Check base machine storage (after delete) 25 | run: df -h / 26 | - 27 | name: Checkout 28 | uses: actions/checkout@v2 29 | - 30 | name: Login to DockerHub 31 | uses: docker/login-action@v1 32 | with: 33 | username: ${{ secrets.DOCKERHUB_USERNAME }} 34 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 35 | - 36 | name: Build and push 37 | uses: docker/build-push-action@v2 38 | with: 39 | context: ./docker 40 | push: true 41 | file: docker/cudadev.Dockerfile 42 | tags: cmusei/juneberry:cudadev-dev 43 | -------------------------------------------------------------------------------- /.github/workflows/docker-image-cudabricks-base.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI CUDABricksBase 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Check base machine storage 15 | run: df -h / 16 | - 17 | name: Delete unused packages and tools 18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell && 19 | sudo apt-get autoremove -y && 20 | sudo apt-get clean && 21 | rm -rf /usr/share/dotnet/ 22 | rm -rf /opt/hostedtoolcache 23 | - 24 | name: Check base machine storage (after delete) 25 | run: df -h / 26 | - 27 | name: Checkout 28 | uses: actions/checkout@v2 29 | - 30 | name: Login to DockerHub 31 | uses: docker/login-action@v1 32 | with: 33 | username: ${{ secrets.DOCKERHUB_USERNAME }} 34 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 35 | - 36 | name: Build and push 37 | uses: docker/build-push-action@v2 38 | with: 39 | context: ./docker/databricks 40 | push: true 41 | file: docker/databricks/base.Dockerfile 42 | tags: cmusei/juneberry:cudabricks-base 43 | -------------------------------------------------------------------------------- /.github/workflows/docker-image-cudabricks.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI CUDABricks 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - 14 | name: Check base machine storage 15 | run: df -h / 16 | - 17 | name: Delete unused packages and tools 18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell && 19 | sudo apt-get autoremove -y && 20 | sudo apt-get clean && 21 | rm -rf /usr/share/dotnet/ 22 | rm -rf /opt/hostedtoolcache 23 | - 24 | name: Check base machine storage (after delete) 25 | run: df -h / 26 | - 27 | name: Checkout 28 | uses: actions/checkout@v2 29 | - 30 | name: Login to DockerHub 31 | uses: docker/login-action@v1 32 | with: 33 | username: ${{ secrets.DOCKERHUB_USERNAME }} 34 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 35 | - 36 | name: Build and push 37 | uses: docker/build-push-action@v2 38 | with: 39 | context: ./docker/databricks 40 | push: true 41 | file: docker/databricks/ganglia.Dockerfile 42 | tags: cmusei/juneberry:cudabricks-dev 43 | -------------------------------------------------------------------------------- /juneberry/config/plugin.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | from prodict import Prodict 26 | 27 | 28 | class Plugin(Prodict): 29 | fqcn: str 30 | kwargs: Prodict 31 | -------------------------------------------------------------------------------- /juneberry/onnx/utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | from juneberry.platform import PlatformDefinitions 26 | 27 | 28 | class ONNXPlatformDefinitions(PlatformDefinitions): 29 | def get_model_filename(self): 30 | """ :return: The name of the model file that the trainer saves and what evaluators should load""" 31 | return "model.onnx" 32 | -------------------------------------------------------------------------------- /docker/set_user.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # This convenience script is designed to be executed INSIDE the juneberry container 4 | # to create a user and group with IDs that work well with the mounted volumes. This script 5 | # requires that various user information is passed into the container. This user information 6 | # is: 7 | # - user name as USER_NAME 8 | # - user id as USER_ID 9 | # - user group id as USER_GID 10 | # 11 | # For example, using these options to docker run: 12 | # 13 | # e USER_NAME=${USER} -e USER_ID=`id -u ${USER}` -e USER_GID=`id -g ${USER}` 14 | # 15 | # Or via this line in enter_juneberry_container: 16 | # 17 | # ENVS_USER="-e USER_NAME=${USER} -e USER_ID=`id -u ${USER}` -e USER_GID=`id -g ${USER}`" 18 | # 19 | # As this is all temporary, this script needs to be executed on every new container instantiation. 20 | 21 | # Add the group and user if all three of these are set. 22 | if test -n "${USER_NAME}" && test -n "${USER_ID}" && test -n "${USER_GID}"; then 23 | groupadd -g ${USER_GID} domain_users 24 | useradd -m -s /bin/bash -u ${USER_ID} -g ${USER_GID} -G root ${USER_NAME} 25 | 26 | # Add the default path to the bashrc. In the case of the nvidia containers, they have 27 | # manually set the path via docker file not in a global bashrc or profile, 28 | # so we have no way to source that in our bashrc. 29 | echo "export PATH=${PATH}" >> /home/${USER_NAME}/.bashrc 30 | 31 | # Provide sudo access to mimic a normal system where the user has sudo access. 32 | echo "${USER_NAME} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers 33 | 34 | # Now, set us to that user. 35 | su ${USER_NAME} 36 | else 37 | echo "Not setting user because USER_NAME, USER_ID, or USER_GID not set." 38 | fi 39 | -------------------------------------------------------------------------------- /test/metrics/classification/data/config_classification.json: -------------------------------------------------------------------------------- 1 | { 2 | "metrics": [ 3 | { 4 | "fqcn": "juneberry.metrics.classification.sklearn.metrics.Metrics", 5 | "kwargs": { 6 | "fqn": "sklearn.metrics.accuracy_score", 7 | "name": "accuracy_score", 8 | "kwargs": { 9 | "normalize": false 10 | } 11 | } 12 | }, 13 | { 14 | "fqcn": "juneberry.metrics.classification.torchmetrics.metrics.Metrics", 15 | "kwargs": { 16 | "fqn": "torchmetrics.functional.accuracy", 17 | "name": "func_accuracy", 18 | "kwargs": { 19 | "top_k": 2 20 | } 21 | } 22 | }, 23 | { 24 | "fqcn": "juneberry.metrics.classification.torchmetrics.metrics.Metrics", 25 | "kwargs": { 26 | "fqn": "torchmetrics.Accuracy", 27 | "name": "obj_accuracy", 28 | "kwargs": { 29 | "top_k": 2 30 | } 31 | } 32 | }, 33 | { 34 | "fqcn": "juneberry.metrics.classification.torchnn.metrics.Metrics", 35 | "kwargs": { 36 | "fqn": "torch.nn.CrossEntropyLoss", 37 | "name": "loss", 38 | "kwargs": {} 39 | } 40 | }, 41 | { 42 | "fqcn": "juneberry.metrics.classification.tensorflow.metrics.Metrics", 43 | "kwargs": { 44 | "fqn": "tensorflow.keras.metrics.Accuracy", 45 | "name": "tf_accuracy", 46 | "kwargs": { 47 | "standalone": true 48 | } 49 | } 50 | }, 51 | { 52 | "fqcn": "juneberry.metrics.classification.tensorflow.metrics.Metrics", 53 | "kwargs": { 54 | "fqn": "tensorflow.keras.metrics.binary_accuracy", 55 | "name": "tf_binary_accuracy", 56 | "kwargs": { 57 | "standalone": true 58 | } 59 | } 60 | } 61 | ] 62 | } -------------------------------------------------------------------------------- /juneberry/pytorch/torchvision.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import importlib 26 | 27 | 28 | class Wrapper: 29 | """ 30 | Basic wrapper for torchvision model classes where class_name 31 | specifies the class name and class_args is a dictionary of 32 | args to be passed (expanded) to the constructor. 33 | """ 34 | 35 | def __call__(self, class_name, class_args, num_classes): 36 | mod = importlib.import_module('torchvision.models') 37 | my_class = getattr(mod, class_name) 38 | return my_class(**class_args) 39 | -------------------------------------------------------------------------------- /docs/zoo.md: -------------------------------------------------------------------------------- 1 | Model Zoo Overview 2 | ========== 3 | 4 | # Introduction 5 | 6 | Juneberry supports the idea of a model zoo, which contains config files and pre-trained models that 7 | can be downloaded and used in Juneberry. Model zoo files are stored on remote servers in a hierarchy 8 | similar to how the 'models' directory is organized. Consider the following path to a zoo file: 9 | 10 | "https://juneberry.com/models/my-model/resnet.zip" 11 | 12 | - https://juneberry.com/models - The base url of the server where the models are stored. 13 | - my-model/resnet18.zip - A zip containing the model named "my-model/resnet18". 14 | 15 | # Packaging a model 16 | 17 | The model zip file contains any necessary files required to share the model. These typically include the 18 | following: 19 | 20 | - config.json 21 | - model.pt or model.h5 22 | - (optional) hashes.json 23 | 24 | When provided, the hashes.json file can confirm which model architecture was used to generate 25 | the model. If the model_architecture hash embedded inside the hashes.json does NOT match the hash of 26 | locally constructed model architecture summary, then the model will not be loaded from the zoo and an 27 | error is generated. During training, a "hashes-latest.json" file will be produced which contains the 28 | model_architecture hash that was used to train the model. 29 | 30 | A convenience tool is provided which packages up the zip file. To invoke the tool, specify the model and 31 | a directory representing a staging area for zip files to be uploaded to the zoo. The tool expects to be run from 32 | the root of the workspace. Consider the following command: 33 | 34 | `python -m juneberry.zoo my-model/resnet18 ./zoo-staging` 35 | 36 | This command would create the file "./zoo-staging/my-model/resnet18.zip" containing the model's config file, the 37 | model.pt (assuming it is a PyTorch model), and a hashes.json file (if one exists) or a copy 38 | of "hashes-latest.json", if one exists, renamed to "hashes.json". -------------------------------------------------------------------------------- /juneberry/transforms/tabular.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | Simple transforms for tabular data. 27 | """ 28 | 29 | 30 | class RemoveColumns: 31 | def __init__(self, indexes: list): 32 | self.reverse_indexes = indexes.copy() 33 | self.reverse_indexes.sort(reverse=True) 34 | 35 | def __call__(self, row): 36 | """ 37 | Transformation function that removes the value from the row. 38 | """ 39 | # TODO: Probably a better way to do this in pandas or something else via a "view" wrapper and just 40 | # remap indices. 41 | row_copy = row.copy() 42 | for idx in self.reverse_indexes: 43 | del row_copy[idx] 44 | return row_copy 45 | -------------------------------------------------------------------------------- /juneberry/pytorch/privacy/model_transforms.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | 27 | from opacus.dp_model_inspector import DPModelInspector 28 | from opacus.utils import module_modification 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | class ConvertBatchnormModules: 34 | """ 35 | Transform used to convert bactchnorms for use in opacus 36 | """ 37 | 38 | def __call__(self, model): 39 | logger.info(f"Attempting conversion of batchnorm modules.") 40 | model = module_modification.convert_batchnorm_modules(model) 41 | inspector = DPModelInspector() 42 | logger.info(f"... Is converted model valid for DPSGD?: {inspector.validate(model)}") 43 | 44 | return model 45 | -------------------------------------------------------------------------------- /juneberry/tuning/reporter.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | from typing import Dict, List 27 | 28 | from ray.tune import CLIReporter 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | class CustomReporter(CLIReporter): 34 | """ 35 | This class is responsible for altering the logging behavior in Ray Tune. By default, Ray Tune just 36 | prints its log messages. This CustomReporter overrides that print statement and changes it to a 37 | logger message, so a record of the message will be maintained inside Juneberry log files. 38 | """ 39 | 40 | def report(self, trials: List, done: bool, *sys_info: Dict): 41 | logger.info(f"Status Message from Ray Tune:\n{self._progress_str(trials, done, *sys_info)}") 42 | -------------------------------------------------------------------------------- /juneberry/schemas/tuning_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "$id": "tuning_schema.json", 4 | "definitions": { 5 | "plugin_item": { 6 | "type": "object", 7 | "properties": { 8 | "fqcn": { "type": "string" }, 9 | "kwargs": { "type": "object" } 10 | }, 11 | "required": [ "fqcn" ] 12 | } 13 | }, 14 | "type": "object", 15 | "description": "A configuration for tuning hyperparameters in a Juneberry model.", 16 | "properties": { 17 | "description": { "type": "string" }, 18 | "format_version": { "type": "string" }, 19 | "num_samples": { "type": "integer" }, 20 | "scheduler": { "$ref": "#/definitions/plugin_item" }, 21 | "search_algorithm": { "$ref": "#/definitions/plugin_item" }, 22 | "search_space": { 23 | "type": "array", 24 | "items": { 25 | "type": "object", 26 | "properties": { 27 | "fqcn": { "type": "string" }, 28 | "hyperparameter_name": { "type": "string" }, 29 | "kwargs": { "type": "object" } 30 | }, 31 | "required" : [ "hyperparameter_name", "fqcn" ] 32 | } 33 | }, 34 | "timestamp": { "type": "string" }, 35 | "trial_resources": { 36 | "type": "object", 37 | "properties": { 38 | "cpu": { "type": "integer" }, 39 | "gpu": { "type": "integer" } 40 | } 41 | }, 42 | "tuning_parameters": { 43 | "type": "object", 44 | "properties": { 45 | "checkpoint_interval": { "type": "integer" }, 46 | "metric": { "type": "string" }, 47 | "mode": { "type": "string", "enum": [ "min", "max" ] }, 48 | "scope": { "type": "string", "enum": [ "all", "last", "avg", "last-5-avg", "last-10-avg" ] } 49 | } 50 | } 51 | }, 52 | "required": [ "search_space" ] 53 | } -------------------------------------------------------------------------------- /juneberry/metrics/classification/metrics.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - General Release 5 | # 6 | # Copyright 2021 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software subject to its own license. 20 | # 21 | # DM21-0884 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | This module provides a common classification metric plugin init to be used with Juneberry. 27 | """ 28 | import logging 29 | from typing import Dict 30 | 31 | logger = logging.getLogger(__name__) 32 | 33 | 34 | class MetricsBase: 35 | 36 | def __init__(self, 37 | fqn: str, 38 | name: str, 39 | kwargs: Dict = None) -> None: 40 | self.fqn = fqn 41 | self.name = name 42 | self.kwargs = kwargs 43 | 44 | if not name or name == "": 45 | log_msg = f"Unable to init metrics: fqn={self.fqn}, kwargs={self.kwargs}. Missing 'name' parameter." 46 | logger.error(log_msg) 47 | raise ValueError(log_msg) 48 | -------------------------------------------------------------------------------- /juneberry/architectures/pytorch/torchvision.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import importlib 26 | import logging 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class Wrapper: 32 | """ 33 | Basic wrapper for torchvision models classes 34 | """ 35 | 36 | def __call__(self, className, classArgs, num_classes): 37 | logger.warning("This class is deprecated! Please use:") 38 | logger.warning(" juneberry.pytorch.torchvision.Wrapper.") 39 | logger.warning("NOTE! The kwargs names change as well:") 40 | logger.warning(" className -> class_name") 41 | logger.warning(" classArgs -> class_args") 42 | mod = importlib.import_module('torchvision.models') 43 | my_class = getattr(mod, className) 44 | return my_class(**classArgs) 45 | -------------------------------------------------------------------------------- /juneberry/transforms/random_shift.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | Transformer to random shift an image. It requires a configuration with two arguments of the maximum number 27 | of pixels to shift or mirror the image. 28 | 29 | "config": { "max_width": 0.0, "max_height": 0.0 } 30 | 31 | """ 32 | 33 | import juneberry.image as jb_img_utils 34 | 35 | 36 | class RandomShift: 37 | def __init__(self, max_width, max_height): 38 | self.max_width = max_width 39 | self.max_height = max_height 40 | 41 | def __call__(self, image): 42 | """ 43 | Transformation function that is provided a PIL image. 44 | :param image: The source PIL image. 45 | :return: The transformed PIL image. 46 | """ 47 | return jb_img_utils.random_shift_image(image, self.max_width, self.max_height) 48 | -------------------------------------------------------------------------------- /juneberry/detectron2/utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | from juneberry.pytorch.utils import PyTorchPlatformDefinitions 26 | 27 | 28 | class DT2PlatformDefinitions(PyTorchPlatformDefinitions): 29 | def get_config_suffix(self) -> str: 30 | """ 31 | Before training we emit the fully realized configuration file used by the platform. Different backend platforms 32 | use different file types, and while we name them all "platform_config", they need to have the correct 33 | suffix and format. This routine returns the suffix used by the platform, such as ".json" or ".yaml." The 34 | default format is ".json" 35 | :return: The suffix used when saving realized platform_config file before training. 36 | """ 37 | return ".yaml" 38 | 39 | def has_platform_config(self) -> bool: 40 | return True 41 | -------------------------------------------------------------------------------- /juneberry/schemas/experiment_outline_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "type": "object", 4 | "properties": { 5 | "baseline_config": { "type": "string" }, 6 | "description": { "type": "string" }, 7 | "filters": { "type": "array" }, 8 | "format_version": { "type": "string" }, 9 | "model": { 10 | "type": "object", 11 | "properties": { 12 | "filters": { "type": "array" }, 13 | "maximum_evaluations": { "type": "integer" } 14 | } 15 | }, 16 | "reports": { 17 | "type": "array", 18 | "items": { 19 | "allOf": [{ "$ref": "report_schema.json#/$defs/report"}], 20 | "properties": { 21 | "classes": { "type": "string"}, 22 | "test_tag": { "type": "string" } 23 | } 24 | } 25 | }, 26 | "tests": { 27 | "type": "array", 28 | "items": { 29 | "type": "object", 30 | "properties": { 31 | "tag": { "type": "string" }, 32 | "dataset_path": { "type": "string" }, 33 | "classify": { "type": "integer" } 34 | }, 35 | "required": [ "classify", "dataset_path", "tag" ] 36 | } 37 | }, 38 | "timestamp": { 39 | "type": "string" 40 | }, 41 | "variables": { 42 | "type": "array", 43 | "items": { 44 | "type": "object", 45 | "properties": { 46 | "nickname": { "type": "string" }, 47 | "config_field": { "type": "string" }, 48 | "vals": { 49 | "anyof": [ 50 | { "type": "array" }, 51 | { "type": "string" } 52 | ] 53 | } 54 | }, 55 | "required": [ "config_field", "nickname", "vals" ] 56 | } 57 | } 58 | }, 59 | "required": [ "baseline_config", "reports", "tests", "variables" ] 60 | } 61 | -------------------------------------------------------------------------------- /scripts/setup_lab.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | from pathlib import Path 27 | 28 | 29 | def create_missing_dir(d: Path) -> None: 30 | if not d.exists(): 31 | print(f"Creating project dir \"{d}\".") 32 | d.mkdir(parents=True, exist_ok=True) 33 | 34 | 35 | def create_missing_project_dirs(project_dir: str) -> None: 36 | project_subdirs = [ 37 | "cache", 38 | "dataroot", 39 | "tensorboard", 40 | ] 41 | create_missing_dir(Path(project_dir)) 42 | for subdir in project_subdirs: 43 | create_missing_dir(Path(project_dir, subdir)) 44 | 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser() 48 | parser.add_argument("project_dir", help="Directory containing the Juneberry project.") 49 | args = parser.parse_args() 50 | create_missing_project_dirs(args.project_dir) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | 56 | -------------------------------------------------------------------------------- /juneberry/schemas/rules_list_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "type": "object", 4 | "properties": { 5 | "description": { "type": "string" }, 6 | "format_version": { "type": "string" }, 7 | "timestamp": { "type": "string" }, 8 | "workflows": { 9 | "type": "array", 10 | "items": { 11 | "type": "object", 12 | "properties": { 13 | "name": { "type": "string" }, 14 | "rules": { 15 | "type": "array", 16 | "items": { 17 | "type": "object", 18 | "properties": { 19 | "clean_extras": { 20 | "type": "array", 21 | "items": { "type": "string" } 22 | }, 23 | "command": { 24 | "type": "array", 25 | "items": { "type": "string" } 26 | }, 27 | "doc": { "type": "string" }, 28 | "id": { "type": "integer" }, 29 | "inputs": { 30 | "type": "array", 31 | "items": { "type": "string" } 32 | }, 33 | "outputs": { 34 | "type": "array", 35 | "items": { "type": "string" } 36 | }, 37 | "requirements": { 38 | "type": "array", 39 | "items": { "type": "integer" } 40 | } 41 | }, 42 | "required": [ "command", "doc", "id", "inputs" ] 43 | } 44 | } 45 | }, 46 | "required": [ "name", "rules" ] 47 | } 48 | } 49 | }, 50 | "required": [ "workflows" ] 51 | } 52 | -------------------------------------------------------------------------------- /juneberry/platform.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | class PlatformDefinitions: 26 | def get_model_filename(self) -> str: 27 | """ :return: The name of the model file that the trainer saves and what evaluators should load""" 28 | pass 29 | 30 | def get_config_suffix(self) -> str: 31 | """ 32 | Before training we emit the fully realized configuration file used by the platform. Different backend platforms 33 | use different file types and while Juneberry names them all "platform_config", they need to have the correct 34 | suffix and format. This routine returns the suffix used by the platform, such as ".json" or ".yaml." The 35 | default format is ".json" 36 | :return: The suffix used when saving the realized platform_config file before training. 37 | """ 38 | return ".json" 39 | 40 | def has_platform_config(self) -> bool: 41 | # TODO: This is somewhat of a hack 42 | return True 43 | -------------------------------------------------------------------------------- /test/test_logging.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | from pathlib import Path 27 | import tempfile 28 | 29 | import juneberry.logging as jb_logging 30 | 31 | 32 | def test_filter_repeated_messages(): 33 | 34 | with tempfile.TemporaryDirectory() as tmpdir: 35 | tmplog = Path(tmpdir, "test_jb_logging.out") 36 | jb_logging.setup_logger(tmplog, "", name="test_jb_logging", level=logging.DEBUG, 37 | log_filter_class=jb_logging.RemoveDuplicatesFilter) 38 | 39 | logger = logging.getLogger("test_jb_logging") 40 | logger.info("Repeated message.") 41 | logger.info("Repeated message.") 42 | logger.info("Repeated message.") 43 | 44 | # We logged three messages, but because the messages were duplicates, 45 | # only one message should have been logged. 46 | with open(tmplog, 'r') as f: 47 | num_lines = len(f.readlines()) 48 | assert num_lines == 1 49 | -------------------------------------------------------------------------------- /juneberry/schemas/property_inference_attack_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "definitions": { 4 | "plugin": { 5 | "type": "object", 6 | "properties": { 7 | "fqcn": { "type": "string" }, 8 | "kwargs": { "type": "object" } 9 | }, 10 | "required": [ "fqcn" ] 11 | } 12 | }, 13 | "type": "object", 14 | "description": "A configuration for building an attack experiment using basic Juneberry commands.", 15 | "properties": { 16 | "data_configs": { 17 | "type": "object", 18 | "properties": { 19 | "in_out_builder": { "$ref": "#/definitions/plugin" }, 20 | "query_data": { "type": "string" }, 21 | "training_data": { "type": "string" } 22 | }, 23 | "required": ["in_out_builder", "query_data", "training_data"] 24 | }, 25 | "models": { 26 | "type": "object", 27 | "properties": { 28 | "meta": { "type": "string" }, 29 | "private": { "type": "string" }, 30 | "shadow": { "type": "string" }, 31 | "shadow_disjoint_quantity": { "type": "integer" }, 32 | "shadow_superset_quantity": { "type": "integer" } 33 | }, 34 | "required": ["meta", "private", "shadow", "shadow_disjoint_quantity", "shadow_superset_quantity"] 35 | }, 36 | "watermarks": { 37 | "type": "object", 38 | "properties": { 39 | "disjoint_args": { "type": "array", "items": { "type": "object" } }, 40 | "private_disjoint_args": { "type": "object" }, 41 | "private_superset_args": { "type": "object" }, 42 | "query_watermarks": { "$ref": "#/definitions/plugin" }, 43 | "superset_args": { "type": "array", "items": { "type": "object" } }, 44 | "training_watermarks": { "$ref": "#/definitions/plugin" } 45 | }, 46 | "required": ["disjoint_args", "private_disjoint_args", "private_superset_args", "query_watermarks", 47 | "superset_args", "training_watermarks"] 48 | } 49 | }, 50 | "required": ["data_configs", "models", "watermarks"] 51 | } -------------------------------------------------------------------------------- /juneberry/transforms/random_crop_mirror.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | Simple transformer to shift an image that may have been mirrored. The JSON configuration requires three arguments: 27 | amount of horizontal shift allowed (in pixels), amount of vertical shift allowed (in pixels) and a boolean to control 28 | whether or not the image should be mirrored. 29 | 30 | "config": { "width_pixels": 0, "height_pixels": 0, "mirror": 0 } 31 | 32 | """ 33 | 34 | import juneberry.image as jb_img_utils 35 | 36 | 37 | class RandomCropMirror: 38 | def __init__(self, width_pixels, height_pixels, mirror): 39 | self.width_pixels = width_pixels 40 | self.height_pixels = height_pixels 41 | self.mirror = mirror 42 | 43 | def __call__(self, image): 44 | """ 45 | Transformation function that is provided a PIL image. 46 | :param image: The source PIL image. 47 | :return: The transformed PIL image. 48 | """ 49 | return jb_img_utils.random_crop_mirror_image(image, self.mirror, self.width_pixels, self.height_pixels) 50 | -------------------------------------------------------------------------------- /juneberry/metrics/objectdetection/brambox/utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import json 26 | from typing import Dict, Tuple 27 | 28 | import brambox as bb 29 | from pandas import DataFrame 30 | 31 | from juneberry.config import coco_utils 32 | 33 | 34 | def get_df(anno: Dict, det: Dict) -> Tuple[DataFrame, DataFrame]: 35 | """ 36 | Create brambox-compatible DataFrames to be used in Metrics calls. 37 | :param anno: a dict of annotations in COCO format 38 | :param det: a dict of detections in COCO format 39 | :return: the annotations and detections DataFrames 40 | """ 41 | # TODO don't need to make an anno_parser every time, it doesn't depend on anno or det 42 | anno_parser = bb.io.parser.annotation.CocoParser(parse_image_names=False) 43 | anno_parser.deserialize(json.dumps(anno)) 44 | anno_df = anno_parser.get_df() 45 | 46 | det_parser = bb.io.parser.detection.CocoParser(class_label_map=coco_utils.get_class_label_map(anno)) 47 | det_parser.deserialize(json.dumps(det)) 48 | det_df = det_parser.get_df() 49 | 50 | return anno_df, det_df 51 | -------------------------------------------------------------------------------- /test/config/test_training_output.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | from pathlib import Path 26 | 27 | from juneberry.config.training_output import TrainingOutputBuilder 28 | from juneberry.config.model import ModelConfig 29 | import utils 30 | 31 | 32 | def test_builder(tmp_path): 33 | builder = TrainingOutputBuilder() 34 | 35 | mc = ModelConfig.from_dict(utils.make_basic_model_config()) 36 | 37 | builder.set_from_model_config("test_config", mc) 38 | 39 | # The schema should require these things 40 | builder.output.options.num_training_images = 0 41 | builder.output.options.num_validation_images = 0 42 | builder.output.options.validation_dataset_config_path = "" 43 | builder.output.options.training_dataset_config_path = "" 44 | 45 | builder.output.results.accuracy = [0.0] 46 | builder.output.results.loss = [0.0] 47 | builder.output.results.model_hash = "" 48 | builder.output.results.val_accuracy = [0.0] 49 | builder.output.results.val_loss = [0.0] 50 | 51 | out_path = Path(tmp_path) / "tmp.json" 52 | builder.save(str(out_path)) 53 | 54 | -------------------------------------------------------------------------------- /juneberry/schemas/tuning_output_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "type": "object", 4 | "properties": { 5 | "format_version": { "type": "string" }, 6 | "options": { 7 | "type": "object", 8 | "properties": { 9 | "model_name": { "type": "string" }, 10 | "tuning_config": { "type": "string" } 11 | }, 12 | "required": [ "model_name", "tuning_config" ] 13 | }, 14 | "results": { 15 | "type": "object", 16 | "properties": { 17 | "best_trial_id": { "type": "string" }, 18 | "best_trial_params": { "type": "object" }, 19 | "trial_results": { 20 | "type": "array", 21 | "items": { 22 | "type": "object", 23 | "properties": { 24 | "directory": { 25 | "type": "string" 26 | }, 27 | "id": { 28 | "type": "string" 29 | }, 30 | "num_iterations": { 31 | "type": "integer" 32 | }, 33 | "params": { 34 | "type": "object" 35 | }, 36 | "result_data": { 37 | "type": "object" 38 | } 39 | }, 40 | "required": [ 41 | "directory", 42 | "id", 43 | "num_iterations", 44 | "params", 45 | "result_data" 46 | ] 47 | } 48 | } 49 | }, 50 | "required": [ "best_trial_id", "trial_results" ] 51 | }, 52 | "times": { 53 | "type": "object", 54 | "properties": { 55 | "duration": { "type": "number" }, 56 | "end_time": { "type": "string" }, 57 | "start_time": { "type": "string" } 58 | }, 59 | "required": [ ] 60 | } 61 | }, 62 | "required": [ "options", "results" ] 63 | } 64 | -------------------------------------------------------------------------------- /scripts/dry_run_all_models.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | Trivial script to call "dry run" on every model directory that has a config file. 27 | """ 28 | 29 | import argparse 30 | import os 31 | from pathlib import Path 32 | import subprocess 33 | import sys 34 | 35 | 36 | def main(): 37 | parser = argparse.ArgumentParser(description="Script to call '--dryrun' on every model directory in " 38 | "the specified workspace.") 39 | parser.add_argument("workspace", help="Workspace root (above models).") 40 | args = parser.parse_args() 41 | 42 | workspace = Path(".") / args.workspace 43 | os.chdir(workspace) 44 | 45 | for config_path in workspace.glob("models/**/config.json"): 46 | model_name = "/".join(config_path.parts[1:-1]) 47 | print(f"******** DRY RUN on {model_name}") 48 | result = subprocess.run(['jb_train', '--dryrun', '-w', str(workspace), model_name]) 49 | 50 | if result.returncode != 0: 51 | print(f"Failed to do dry run '{result.returncode}' on {model_name}. EXITING!!") 52 | sys.exit(-1) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | README 4 | ========== 5 | 6 | # Introduction 7 | 8 | Juneberry improves the experience of machine learning experimentation by providing a framework for automating 9 | the training, evaluation, and comparison of multiple models against multiple datasets, thereby reducing errors and 10 | improving reproducibility. 11 | 12 | This README describes how to use the Juneberry framework to execute machine learning tasks. Juneberry follows a (mostly) 13 | declarative programming model composed of sets of config files (dataset, model, and experiment configurations) and 14 | Python plugins for features such as model construction and transformation. 15 | 16 | If you're looking for a slightly more in depth description of Juneberry see [What Is Juneberry](docs/whatis.md). 17 | 18 | Other resources can be found at the [Juneberry Home Page](https://www.sei.cmu.edu/our-work/projects/display.cfm?customel_datapageid_4050=334902) 19 | 20 | # Supporting Documentation 21 | 22 | ## How to Install Juneberry 23 | 24 | The [Getting Started](docs/getting_started.md) documentation explains how to install Juneberry. It also 25 | includes a simple test command you can use to verify the installation. 26 | 27 | ## Experiment Overview 28 | 29 | The [Workspace and Experiment Overview](docs/overview.md) documentation contains information about 30 | the structure of the Juneberry workspace and how to organize experiments. 31 | 32 | ## Experiment Tutorial 33 | 34 | The [Juneberry Basic Tutorial](docs/tutorial.md) describes how to create a model, train the model, 35 | and run an experiment. 36 | 37 | ## Configuring Juneberry 38 | 39 | The [Juneberry Configuration Guide](docs/configuring.md) describes various ways to configure Juneberry. 40 | 41 | ## Known Warnings 42 | 43 | During normal use of Juneberry, you may encounter warning messages. The 44 | [Known Warnings in Juneberry](docs/known_warnings.md) documentation contains information about known warning 45 | messages and what (if anything) should be done about them. 46 | 47 | ## Further Reading 48 | 49 | The [vignettes](docs/vignettes) directory contains detailed walkthroughs of various Juneberry tasks. 50 | The vignettes provide helpful examples of how to construct various Juneberry configuration files, 51 | including datasets, models, and experiments. A good start is 52 | [Replicating a Classic Machine Learning Result with Juneberry](docs/vignettes/vignette1/Replicating_a_Classic_Machine_Learning_Result_with_Juneberry.md). 53 | 54 | # Copyright 55 | 56 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms. 57 | -------------------------------------------------------------------------------- /juneberry/metrics/classification/sklearn/metrics.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | This module provides a sklearn classification metric plugin to be used with Juneberry. 27 | """ 28 | import logging 29 | from typing import Dict 30 | 31 | from juneberry.evaluation import utils as jb_eval_utils 32 | from juneberry.loader import load_verify_fqn_function 33 | from juneberry.metrics.classification.metrics import MetricsBase 34 | 35 | logger = logging.getLogger(__name__) 36 | 37 | 38 | class Metrics(MetricsBase): 39 | 40 | def __init__(self, 41 | fqn: str, 42 | name: str, 43 | kwargs: Dict = None) -> None: 44 | super().__init__(fqn, name, kwargs) 45 | 46 | def __call__(self, target, preds, binary=False): 47 | singular_preds = jb_eval_utils.continuous_predictions_to_class(preds, binary) 48 | metrics_function = load_verify_fqn_function(self.fqn, {**{"y_true": [], "y_pred": []}, **self.kwargs}) 49 | 50 | if not metrics_function: 51 | log_msg = f"Unable to create metrics function: fqn={self.fqn}, name={self.name}, kwargs={self.kwargs}." 52 | logger.error(log_msg) 53 | raise ValueError(log_msg) 54 | 55 | return metrics_function(target, singular_preds, **self.kwargs) 56 | -------------------------------------------------------------------------------- /scripts/predictions_to_coco.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | from pathlib import Path 27 | 28 | import juneberry.config.coco_utils as coco_utils 29 | import juneberry.scripting.utils as jb_scripting 30 | 31 | 32 | def setup_args(parser) -> None: 33 | """ 34 | Adds arguments to the parser 35 | :param parser: The parser in which to add arguments. 36 | """ 37 | parser.add_argument('dataset', help='Data set used to drive predictions.') 38 | parser.add_argument('predictions', help='Path to predictions file within workspace to convert.') 39 | parser.add_argument('output', help='Path to file for coco output.') 40 | 41 | 42 | def main(): 43 | # Setup and parse all arguments. 44 | parser = argparse.ArgumentParser(description="Converts predictions output to coco format.") 45 | setup_args(parser) 46 | jb_scripting.setup_args(parser) 47 | args = parser.parse_args() 48 | 49 | # TODO: Updated jb_scripting to be more clear 50 | lab = jb_scripting.setup_workspace(args, log_file=None) 51 | 52 | coco_utils.save_predictions_as_anno(data_root=lab.data_root(), dataset_config=args.dataset, 53 | predict_file=args.predictions, output_file=Path(args.output)) 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /scripts/merge_predictions.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | import csv 27 | 28 | import juneberry.filesystem as jb_fs 29 | 30 | 31 | def add_predictions(filepath, id, csvwriter): 32 | data = jb_fs.load_file(filepath) 33 | 34 | labels = data['testResults']['labels'] 35 | for i, preds in enumerate(data['testResults']['predictions']): 36 | row = [id, labels[i]] + preds 37 | csvwriter.writerow(row) 38 | 39 | 40 | def setup_args(parser) -> None: 41 | """ 42 | Adds arguments to the parser 43 | :param parser: The parser in which to add arguments. 44 | """ 45 | parser.add_argument('predictions0', help='First predictions file.') 46 | parser.add_argument('predictions1', help='Second predictions file.') 47 | parser.add_argument('output', help='Path to file for coco output.') 48 | 49 | 50 | def main(): 51 | # Setup and parse all arguments. 52 | parser = argparse.ArgumentParser(description="Merges and converts two predictions files to a single csv output.") 53 | setup_args(parser) 54 | args = parser.parse_args() 55 | 56 | with open(args.output, "w") as csv_file: 57 | csv_writer = csv.writer(csv_file) 58 | add_predictions(args.predictions0, 0, csv_writer) 59 | add_predictions(args.predictions1, 1, csv_writer) 60 | 61 | if __name__ == "__main__": 62 | main() -------------------------------------------------------------------------------- /juneberry/metrics/classification/torchnn/metrics.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | This module provides a torchnn classification metric plugin to be used with Juneberry. 27 | """ 28 | import logging 29 | from typing import Dict 30 | 31 | import torch 32 | 33 | from juneberry.loader import construct_instance 34 | from juneberry.metrics.classification.metrics import MetricsBase 35 | import juneberry.pytorch.utils as pyt_utils 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | class Metrics(MetricsBase): 41 | 42 | def __init__(self, 43 | fqn: str, 44 | name: str, 45 | kwargs: Dict = None) -> None: 46 | super().__init__(fqn, name, kwargs) 47 | 48 | def __call__(self, target, preds, binary=False): 49 | target, preds = torch.LongTensor(target), torch.FloatTensor(preds) 50 | metrics_function = construct_instance(self.fqn, self.kwargs) 51 | 52 | if not metrics_function: 53 | log_msg = f"Unable to create metrics function: fqn={self.fqn}, name={self.name}, kwargs={self.kwargs}." 54 | logger.error(log_msg) 55 | raise ValueError(log_msg) 56 | 57 | if binary: 58 | metrics_function = pyt_utils.function_wrapper_unsqueeze_1(metrics_function) 59 | result = metrics_function(preds, target, **self.kwargs) 60 | return result.numpy() 61 | -------------------------------------------------------------------------------- /juneberry/scripting/tuning_sprout.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | from argparse import Namespace 25 | from dataclasses import dataclass 26 | import logging 27 | 28 | from juneberry.scripting.sprout import Sprout 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | @dataclass 34 | class TuningSprout(Sprout): 35 | """ 36 | The TuningSprout class extends the Sprout class to include args related to model tuning. 37 | """ 38 | # ========== SCRIPT ARGS ========== 39 | # ===== EXECUTION MODE ARGS ===== 40 | dryrun: bool = None 41 | 42 | # ===== MODEL ARGS ===== 43 | model_name: str = None 44 | 45 | # ===== TUNING ARGS ===== 46 | tuning_config: str = None 47 | 48 | def grow_from_args(self, args: Namespace) -> None: 49 | """ 50 | This method reads a Namespace of arguments and sets the corresponding attributes in the Sprout. 51 | :param args: A Namespace of arguments, typically created by passing arguments to a Juneberry script. 52 | :return: Nothing. 53 | """ 54 | # Start by setting the attributes in the base Sprout. 55 | super().grow_from_args(args) 56 | 57 | # Now set the attributes listed in the TuningSprout. 58 | self.dryrun = getattr(args, "dryrun", False) 59 | self.model_name = getattr(args, "modelName", None) 60 | self.tuning_config = getattr(args, "tuningConfig", None) 61 | -------------------------------------------------------------------------------- /docker/run_container_minimal.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | # ================================================================================================= 26 | # WARNING: These images and scripts create containers with NO SECURITY PRACTICES, such as 27 | # separate user accounts, unprivileged users, etc. 28 | # 29 | # USE AT YOUR OWN RISK 30 | # ================================================================================================= 31 | 32 | # This script provides a starting point for creating your own container launcher. If your layout 33 | # follows the basic Juneberry lab layout, then this script should basically work as-is. 34 | # 35 | # Run this script from inside your workspace of choice. 36 | 37 | WS=${PWD} 38 | LAB="$(dirname "$WS")" 39 | CACHE="${LAB}/cache" 40 | docker run -it --rm --network=host --ipc=host --name ${USER} \ 41 | --env HTTP_PROXY --env http_proxy --env HTTPS_PROXY --env https_proxy --env NO_PROXY --env no_proxy \ 42 | -e USER_NAME=${USER} -e USER_ID=$(id -u ${USER}) -e USER_GID=$(id -g ${USER}) -e HOST_UNAME=$(uname) \ 43 | -v ${WS}:/workspace -w /workspace \ 44 | -v ${LAB}/juneberry:/juneberry \ 45 | -v ${LAB}/dataroot:/dataroot:ro \ 46 | -v ${LAB}/tensorboard:/tensorboard \ 47 | -v ${CACHE}/hub:/root/.cache/torch/hub \ 48 | -v ${CACHE}/torch:/root/.torch \ 49 | -v ${CACHE}/tensorflow:/root/tensorflow_datasets \ 50 | cmusei/juneberry:cpudev \ 51 | bash -------------------------------------------------------------------------------- /test/pytorch/test_utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | from juneberry.config.model import PytorchOptions 26 | import juneberry.pytorch.utils as pyt_utils 27 | 28 | 29 | class DummyLoss: 30 | def __init__(self, model): 31 | model['return'] = 'World' 32 | 33 | def __call__(self, predicted, target): 34 | return predicted + target 35 | 36 | 37 | def test_make_loss(): 38 | # NOTE: This is run with the current directory (not the root test directory) in the python path 39 | config = PytorchOptions.from_dict({'loss_fn': 'pytorch.test_utils.DummyLoss'}) 40 | model = {'input': 'Hello'} 41 | 42 | loss = pyt_utils.make_loss(config, model, False) 43 | assert model['return'] == 'World' 44 | assert loss(2, 3) == 5 45 | 46 | 47 | class DummyLR: 48 | def __init__(self, optimizer, epochs, foo): 49 | self.optimizer = optimizer 50 | self.epochs = epochs 51 | self.foo = foo 52 | 53 | 54 | def test_make_lr_schedule(): 55 | lr_options = PytorchOptions.from_dict({ 56 | "lr_schedule_args": { 57 | "epochs": 25, 58 | "foo": "bar" 59 | }, 60 | "lr_schedule_fn": "pytorch.test_utils.DummyLR" 61 | }) 62 | 63 | # These epochs should override 64 | lr_scheduler = pyt_utils.make_lr_scheduler(lr_options, "hello", 10) 65 | assert lr_scheduler.optimizer == "hello" 66 | assert lr_scheduler.epochs == 10 67 | assert lr_scheduler.foo == "bar" 68 | -------------------------------------------------------------------------------- /juneberry/detectron2/transforms.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | 27 | import numpy as np 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | 32 | class DT2NoOp: 33 | """ 34 | Example of a (no-operation) transformer which demonstrates ALL available extension points when 35 | building your own DT2 Transform class. 36 | """ 37 | def apply_image(self, img: np.ndarray) -> np.ndarray: 38 | return img 39 | 40 | def apply_box(self, box: np.ndarray) -> np.ndarray: 41 | return box 42 | 43 | def apply_coords(self, coords: np.ndarray) -> np.ndarray: 44 | return coords 45 | 46 | def apply_polygons(self, polygons: list) -> list: 47 | return polygons 48 | 49 | def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: 50 | return segmentation 51 | 52 | 53 | class DT2Logger: 54 | def __init__(self, msg="None"): 55 | self.msg = msg 56 | 57 | def apply_coords(self, coords: np.ndarray): 58 | logger.info(f"apply_coords: msg={self.msg}, coords-type={type(coords)}") 59 | return coords 60 | 61 | def apply_polygons(self, polygons: list) -> list: 62 | logger.info(f"apply_polygons: msg={self.msg}, polygons-type={type(polygons)}") 63 | return polygons 64 | 65 | def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: 66 | logger.info(f"apply_segmentation: msg={self.msg}, segmentation-type={type(segmentation)}") 67 | return segmentation 68 | -------------------------------------------------------------------------------- /test/test_image.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | Unit tests for core_utils for use by pytest. 27 | """ 28 | 29 | import numpy as np 30 | from PIL import Image 31 | 32 | import juneberry.image as jb_image 33 | 34 | 35 | def make_raw_images(): 36 | images = [] 37 | for gray in range(0, 10): 38 | gray_frac = gray * 10 39 | color = f"rgb({gray_frac}%, {gray_frac}%, {gray_frac}%)" 40 | images.append(Image.new('RGB', (10, 10), color)) 41 | 42 | return images 43 | 44 | 45 | def test_compute_elementwise_mean() -> None: 46 | """ 47 | We do one simple 2x2 test to make sure we get basic results. 48 | """ 49 | a = np.array(range(1, 5), dtype='uint8') 50 | b = np.array(range(10, 50, 10), dtype='uint8') 51 | raw_correct = [] 52 | for i in range(0, 4): 53 | raw_correct.append(int((a[i] + b[i]) / 2)) 54 | 55 | a = a.reshape((2, 2)) 56 | b = b.reshape((2, 2)) 57 | correct = np.array(raw_correct).reshape((2, 2)) 58 | 59 | results = jb_image.compute_elementwise_mean(np.array([a, b])) 60 | 61 | for i in range(0, 2): 62 | for j in range(0, 2): 63 | assert correct[i][j] == results[i][j] 64 | 65 | 66 | def test_channel_means() -> None: 67 | images = make_raw_images() 68 | images = [np.array(image) for image in images] 69 | results = jb_image.compute_channel_means(images) 70 | assert results[0] == 0.45098039215686275 71 | assert results[1] == 0.45098039215686275 72 | assert results[2] == 0.45098039215686275 73 | -------------------------------------------------------------------------------- /docs/software_maintenance.md: -------------------------------------------------------------------------------- 1 | Maintenance 2 | ============ 3 | 4 | # Overview 5 | 6 | This page covers standards, techniques, etc. 7 | 8 | # Python 9 | ## Installation & Usage 10 | - It is suggested that you use a virtual environment 11 | - The choice is yours, but the use of [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) is suggested 12 | - This framework is designed for python versions 3.7+ 13 | - You can use [pyenv](https://github.com/pyenv/pyenv) to manage different python versions on your computer 14 | - Required python packages are documented in the Pipfile 15 | ## Coding Guidelines 16 | - Standard python naming standards, etc. 17 | - Configure logging for info 18 | 19 | ### Structure 20 | For coding structure we like to separate the argument parsing from the business functionality to allow the 21 | script to be loaded externally and have the business functions called. The usual pattern is to have the main() 22 | routine do all the argument parsing, validity checks, open files, and read configs then call the business functions. 23 | So, something like: 24 | 25 | ``` 26 | #! /usr/bin/env python3 27 | 28 | import argparse 29 | import logging 30 | 31 | 32 | # Use some better name than business logic... 33 | def hello_world(data_root): 34 | print(f"Hello world from {data_root}") 35 | 36 | 37 | def main(): 38 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') 39 | 40 | parser = argparse.ArgumentParser(description="Pithy comment here.") 41 | parser.add_argument('dataRoot', help='Root of data directory') 42 | 43 | args = parser.parse_args() 44 | 45 | hello_world(args.dataRoot) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | ``` 51 | 52 | ## JSON 53 | - snake_case names for properties 54 | - 4 space indent 55 | 56 | ## Git 57 | - We use standard gitflow style 58 | - We default to squash on merges 59 | - When branches get confusing, prefer to rebase to a new branch with the suffix "-merge" 60 | - Most tasks are features 61 | 62 | # Design Notes 63 | 64 | ## Config files 65 | Juneberry is a config-driven system. Different types of config files store different concerns, for example the structure 66 | of a model would be in one type of config, while the contents of a dataset would be defined in another. Regardless of 67 | config type, a similar process is used to load config data, so all configs follow a similar philosophy. 68 | 69 | 1) Files or blob are in common python-happy formats such as json, yaml or toml 70 | 1) Files are loaded into pure python data structures 71 | 1) Version conversions are performed 72 | 1) Data structures are validated using jsonschema regardless of file format 73 | 1) Validated structures are loaded into convenience data models such as Prodict. 74 | 75 | # Copyright 76 | 77 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms. 78 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import juneberry.utils as jb_utils 26 | 27 | 28 | def setup_data(): 29 | test_data = { 30 | "someKey": 1, 31 | "otherKey": 2, 32 | 'nested': { 33 | 'arrayKey': [1, 2, 3], 34 | 'dictKey': { 35 | 'subKeyA': 'Frodo', 36 | 'subKeyB': 'Sam', 37 | 'okay': 'Merry' 38 | } 39 | } 40 | } 41 | 42 | expected_data = { 43 | "some_key": 1, 44 | "other_key": 2, 45 | 'nested': { 46 | 'array_key': [1, 2, 3], 47 | 'dict_key': { 48 | 'sub_key_a': 'Frodo', 49 | 'sub_key_b': 'Sam', 50 | 'okay': 'Merry' 51 | } 52 | } 53 | } 54 | 55 | key_map = { 56 | 'someKey': 'some_key', 57 | 'otherKey': 'other_key', 58 | 'arrayKey': 'array_key', 59 | 'dictKey': 'dict_key', 60 | 'subKeyA': 'sub_key_a', 61 | 'subKeyB': 'sub_key_b', 62 | } 63 | 64 | return test_data, expected_data, key_map 65 | 66 | 67 | def test_rekey(): 68 | test_data, expected_data, key_map = setup_data() 69 | 70 | # Convert based on a known key_map 71 | jb_utils.rekey(test_data, key_map) 72 | assert expected_data == test_data 73 | 74 | 75 | def test_snake_case(): 76 | test_data, expected_data, key_map = setup_data() 77 | 78 | # Convert based on the algo 79 | new_map = jb_utils.mixed_to_snake_struct_keys(test_data) 80 | assert expected_data == test_data 81 | assert key_map == new_map 82 | -------------------------------------------------------------------------------- /test/test_transforms.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | The vast majority of transformers just wrap specific functional calls such 27 | as calls in the image support. We expect those tests to cover that part of the 28 | functionality. 29 | 30 | These tests are to make sure that the transforms have the proper signature 31 | and can be loaded by the transform_manager. Thus, in most cases all we need 32 | to do is to pass the configuration into the transform manager. 33 | 34 | """ 35 | 36 | import juneberry.transforms.transform_manager 37 | 38 | 39 | def test_load_random_crop(): 40 | config = [ 41 | { 42 | 'fqcn': 'juneberry.transforms.random_crop_mirror.RandomCropMirror', 43 | 'kwargs': {"width_pixels": 0, "height_pixels": 0, "mirror": 0} 44 | } 45 | ] 46 | 47 | jtm = juneberry.transforms.transform_manager.TransformManager(config) 48 | assert len(jtm) == 1 49 | 50 | 51 | def test_load_mirror_flip(): 52 | config = [ 53 | { 54 | 'fqcn': 'juneberry.transforms.random_mirror_flip.RandomMirrorFlip', 55 | 'kwargs': {"mirror_chance": 0.0, "flip_chance": 0.0} 56 | } 57 | ] 58 | 59 | jtm = juneberry.transforms.transform_manager.TransformManager(config) 60 | assert len(jtm) == 1 61 | 62 | 63 | def test_load_random_shift(): 64 | config = [ 65 | { 66 | 'fqcn': 'juneberry.transforms.random_shift.RandomShift', 67 | 'kwargs': {"max_width": 0.0, "max_height": 0.0} 68 | } 69 | ] 70 | 71 | jtm = juneberry.transforms.transform_manager.TransformManager(config) 72 | assert len(jtm) == 1 73 | -------------------------------------------------------------------------------- /juneberry/scripting/sprout.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | from argparse import Namespace 25 | from dataclasses import dataclass 26 | import logging 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | @dataclass 32 | class Sprout: 33 | """ 34 | The purpose of the Sprout class is to capture the arguments that are passed into Juneberry 35 | scripts. The base Sprout class reflects the args that are common to all scripts. 36 | """ 37 | # ========== SCRIPT ARGS ========== 38 | # ===== DIRECTORY ARGS ===== 39 | workspace_dir: str = None 40 | dataroot_dir: str = None 41 | tensorboard_dir: str = None 42 | log_dir: str = None 43 | 44 | # ===== LOGGING ARGS ===== 45 | silent: bool = None 46 | log_level: int = None 47 | 48 | # ===== LAB ARGS ===== 49 | profile_name: str = None 50 | 51 | def grow_from_args(self, args: Namespace) -> None: 52 | """ 53 | This method reads a Namespace of arguments and sets the corresponding attributes in the Sprout. 54 | :param args: A Namespace of arguments, typically created by passing arguments to a Juneberry script. 55 | :return: Nothing. 56 | """ 57 | self.workspace_dir = getattr(args, "workspace", None) 58 | self.dataroot_dir = getattr(args, "dataRoot", None) 59 | self.tensorboard_dir = getattr(args, "tensorboard", None) 60 | self.log_dir = getattr(args, "logDir", None) 61 | self.silent = getattr(args, "silent", False) 62 | self.log_level = logging.DEBUG if getattr(args, "verbose", None) else logging.INFO 63 | self.profile_name = getattr(args, "profileName", None) 64 | -------------------------------------------------------------------------------- /test/moddir/simple_mod.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | 26 | def binary_function(a, b): 27 | return f"{a} and {b}" 28 | 29 | 30 | class MyClass: 31 | @staticmethod 32 | def unary(a): 33 | return f"a is {a}" 34 | 35 | 36 | class ClassWithInit: 37 | def __init__(self, name): 38 | self.name = name 39 | 40 | def __call__(self): 41 | return f"{self.name}" 42 | 43 | def get_name(self): 44 | return self.name 45 | 46 | 47 | class ClassWithInitAndUnaryCall: 48 | def __init__(self, name): 49 | self.name = name 50 | 51 | def __call__(self, arg): 52 | return f"{self.name} {arg}" 53 | 54 | def get_name(self): 55 | return self.name 56 | 57 | 58 | class ClassWithUnaryCallWithOptArg1: 59 | def __init__(self): 60 | self.name = "No name" 61 | 62 | def __call__(self, arg, opt1=None): 63 | return f"{arg} {opt1}" 64 | 65 | def get_name(self): 66 | return self.name 67 | 68 | 69 | class ClassWithUnaryCallWithOptArg2: 70 | def __init__(self): 71 | self.name = "No name" 72 | 73 | def __call__(self, arg, opt2=None): 74 | return f"{arg} {opt2}" 75 | 76 | def get_name(self): 77 | return self.name 78 | 79 | 80 | class LabeledTransformExample: 81 | def __init__(self): 82 | self.name = "No name" 83 | 84 | def __call__(self, arg, *, label, opt1=None): 85 | return f"{arg} {opt1}", int(label) + 1 86 | 87 | def get_name(self): 88 | return self.name 89 | 90 | 91 | def transform_maker(): 92 | return lambda x: x + x 93 | 94 | 95 | def transform_maker_arg(y): 96 | return lambda x: y + x 97 | -------------------------------------------------------------------------------- /juneberry/tensorflow/utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import hashlib 26 | import io 27 | import logging 28 | import sys 29 | 30 | import tensorflow as tf 31 | 32 | from juneberry.platform import PlatformDefinitions 33 | import juneberry.utils as jb_utils 34 | 35 | logger = logging.getLogger(__name__) 36 | 37 | 38 | class TensorFlowPlatformDefinitions(PlatformDefinitions): 39 | def get_model_filename(self) -> str: 40 | """ :return: The name of the model file that the trainer saves and what evaluators should load""" 41 | return "model.h5" 42 | 43 | 44 | def save_summary(model, summary_file_path): 45 | orig = sys.stdout 46 | sys.stdout = open(summary_file_path, 'w+', encoding="utf-8") 47 | model.summary() 48 | sys.stdout = orig 49 | 50 | 51 | def hash_summary(model): 52 | # Swap out a string buffer and capture the summary in the buffer. 53 | output = io.StringIO() 54 | orig = sys.stdout 55 | sys.stdout = output 56 | model.summary() 57 | sys.stdout = orig 58 | 59 | # Hash the model summary and stash off the digest before destroying the buffer. 60 | hasher = hashlib.sha256() 61 | hasher.update(output.getvalue().encode('utf-8')) 62 | digest = hasher.hexdigest() 63 | 64 | # Close the object and discard the memory buffer. 65 | output.close() 66 | 67 | return digest 68 | 69 | 70 | def set_tensorflow_seeds(seed: int): 71 | """ 72 | Sets all the random seeds used by all the various pieces. 73 | :param seed: A random seed to use. Can not be None. 74 | """ 75 | jb_utils.set_seeds(seed) 76 | logger.info(f"Setting TensorFlow seed to: {str(seed)}") 77 | tf.random.set_seed(seed) 78 | -------------------------------------------------------------------------------- /test/test_tabular_data_set.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import csv 26 | import math 27 | 28 | import juneberry.data 29 | import juneberry.pytorch.tabular_dataset as tabular 30 | 31 | 32 | def make_sample_csv(tmp_path, filename, content): 33 | with open(tmp_path / filename, 'w') as csv_file: 34 | writer = csv.writer(csv_file, delimiter=',') 35 | for row in content: 36 | writer.writerow(row) 37 | 38 | 39 | def test_csv_loader(tmp_path): 40 | header = ["col1", "col2", "col3"] 41 | make_sample_csv(tmp_path, "file1.csv", [header, [0.1, 0.2, 1], [0.3, 0.4, 0]]) 42 | make_sample_csv(tmp_path, "file2.csv", [header, [0.5, 0.6, 0], [0.7, 0.8, 2]]) 43 | 44 | labeled_data = juneberry.data.load_labeled_csvs([tmp_path / "file1.csv", tmp_path / "file2.csv"], 2) 45 | 46 | # Load the data and put it in into the data set 47 | rows_labels = juneberry.data.flatten_dict_to_pairs(labeled_data) 48 | ds = tabular.TabularDataset(rows_labels, None) 49 | 50 | assert 4 == len(ds) 51 | 52 | row, label = ds[0] 53 | assert math.isclose(row[0], 0.1, rel_tol=1e-2) 54 | assert math.isclose(row[1], 0.2, rel_tol=1e-2) 55 | assert label == 1 56 | 57 | row, label = ds[1] 58 | assert math.isclose(row[0], 0.3, rel_tol=1e-2) 59 | assert math.isclose(row[1], 0.4, rel_tol=1e-2) 60 | assert label == 0 61 | 62 | row, label = ds[2] 63 | assert math.isclose(row[0], 0.5, rel_tol=1e-2) 64 | assert math.isclose(row[1], 0.6, rel_tol=1e-2) 65 | assert label == 0 66 | 67 | row, label = ds[3] 68 | assert math.isclose(row[0], 0.7, rel_tol=1e-2) 69 | assert math.isclose(row[1], 0.8, rel_tol=1e-2) 70 | assert label == 2 71 | -------------------------------------------------------------------------------- /juneberry/scripting/training_sprout.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | from argparse import Namespace 25 | from dataclasses import dataclass 26 | import logging 27 | 28 | from juneberry.scripting.sprout import Sprout 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | @dataclass() 34 | class TrainingSprout(Sprout): 35 | """ 36 | The TrainingSprout class extends the base Sprout class to include attributes related to training 37 | models in Juneberry. 38 | """ 39 | # ========== SCRIPT ARGS ========== 40 | # ===== EXECUTION MODE ARGS ===== 41 | dryrun: bool = None 42 | num_gpus: int = None 43 | resume: bool = None 44 | 45 | # ===== OUTPUT FORMAT ARGS ===== 46 | onnx: bool = None 47 | skip_native: bool = None 48 | 49 | # ===== MODEL ARGS ===== 50 | model_name: str = None 51 | 52 | def grow_from_args(self, args: Namespace) -> None: 53 | """ 54 | This method reads a Namespace of arguments and sets the corresponding attributes in the Sprout. 55 | :param args: A Namespace of arguments, typically created by passing arguments to a Juneberry script. 56 | :return: Nothing. 57 | """ 58 | # Start by setting the attributes in the base Sprout. 59 | super().grow_from_args(args) 60 | 61 | # Now set the attributes stored in the TrainingSprout. 62 | self.model_name = getattr(args, "modelName", None) 63 | self.num_gpus = getattr(args, "num_gpus", None) 64 | self.dryrun = getattr(args, "dryrun", False) 65 | self.resume = getattr(args, "resume", False) 66 | self.skip_native = getattr(args, "skipNative", False) 67 | self.onnx = getattr(args, "onnx", False) 68 | -------------------------------------------------------------------------------- /juneberry/reporting/report.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | from pathlib import Path 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class Report: 32 | """ 33 | This is the base class for all reports. 34 | """ 35 | def __init__(self, output_str: str = ""): 36 | # If an empty output string was provided, set the output directory for the report to the 37 | # current directory. 38 | if output_str == "": 39 | logger.warning(f"An output path for the report was not provided. Saving the report to the " 40 | f"current working directory.") 41 | self.output_dir = Path.cwd() 42 | else: 43 | # If a string was provided, first convert it to a Path. 44 | self.output_dir = Path(output_str) 45 | 46 | # Now check the final component in the Path. If it contains a "." that means the final 47 | # component contains a file extension. Therefore the provided output_str was for a file 48 | # and not a directory. Therefore, the output_dir must be set to the parent directory of 49 | # the file. 50 | if "." in self.output_dir.parts[-1]: 51 | self.output_dir = self.output_dir.parent 52 | 53 | # Create the output directory (and any parent directories) if it does not exist. 54 | if not self.output_dir.exists(): 55 | self.output_dir.mkdir(parents=True) 56 | 57 | def create_report(self) -> None: 58 | """ 59 | Creates the report file and writes it to the desired output file. 60 | :return: Nothing 61 | """ 62 | logger.warning("'create_report' is not implemented in the base Report class.") 63 | -------------------------------------------------------------------------------- /juneberry/transforms/random_mirror_flip.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | Simple transformer for mirroring or shifting an image. The JSON configuration accept two arguments for chance to flip. 27 | 28 | "config": { "mirror_chance": 0.0, "flip_chance": 0.0 } 29 | 30 | """ 31 | 32 | import logging 33 | import sys 34 | 35 | import juneberry.image as jb_img_utils 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | class RandomMirrorFlip: 41 | def __init__(self, mirror_chance=None, flip_chance=None): 42 | self.mirror_chance = mirror_chance 43 | self.flip_chance = flip_chance 44 | 45 | if self.mirror_chance is None and self.flip_chance is None: 46 | logger.error(f"Neither 'mirror_chance' or 'flip_chance' found in specified. " 47 | f"EXITING.") 48 | sys.exit(-1) 49 | 50 | if self.mirror_chance is not None and not 0 <= self.mirror_chance <= 1: 51 | logger.error(f"mirror_chance must be a value in range (0,1)." 52 | f"mirror_chance value was {self.mirror_chance}" 53 | f"EXITING.") 54 | sys.exit(-1) 55 | 56 | if self.flip_chance is not None and not 0 <= self.flip_chance <= 1: 57 | logger.error(f"flip_chance must be a value in range (0,1)." 58 | f"flip_chance value was {self.flip_chance}" 59 | f"EXITING.") 60 | sys.exit(-1) 61 | 62 | def __call__(self, image): 63 | """ 64 | Transformation function that is provided a PIL image. 65 | :param image: The source PIL image. 66 | :return: The transformed PIL image. 67 | """ 68 | return jb_img_utils.random_mirror_flip(image, self.mirror_chance, self.flip_chance) 69 | -------------------------------------------------------------------------------- /docs/specs/rules_list_specification.md: -------------------------------------------------------------------------------- 1 | Workflow Rules List 2 | =============== 3 | 4 | # Introduction 5 | 6 | This document describes the specification used by Juneberry when expressing a set of "build rules" to 7 | be used when building Juneberry experiments. 8 | 9 | # Schema 10 | 11 | ``` 12 | Rule based version 13 | { 14 | "description": , 15 | "format_version": , 16 | "timestamp": 17 | "workflows": [ 18 | { 19 | "name": "", 20 | "rules": [ 21 | { 22 | "clean_extras": [ ] 23 | "id": 0, 24 | "doc": "short documentation string for the rule." 25 | "inputs": [ ], 26 | "outputs": [ ], 27 | "command:": [ ] 28 | "requirements": [ ] 29 | } 30 | ] 31 | } 32 | ] 33 | } 34 | ``` 35 | 36 | # Details 37 | This section provides the details of each of the fields. 38 | 39 | ## description 40 | **Optional** prose description of this data set. 41 | 42 | ## format_version 43 | Linux-style version of **format** of the file. Not the version of 44 | the data, but the version of the semantics of the fields of this file. 45 | The current version: 0.1.0 46 | 47 | ## timestamp 48 | **Optional** time stamp (ISO format) for when this config was last modified. 49 | 50 | ## workflows 51 | This section contains a list of workflows that can be performed for the experiment. 52 | 53 | ### name 54 | The name of this workflow. 55 | 56 | ### rules 57 | A list of rules that are to be performed to complete this workflow. The rules must be ordered such that, when executed 58 | in the order provided, all the prerequisite inputs will be generated for subsequent rules. However, every “rules” entry 59 | will not necessarily require every previous “rules” entry. Thus, the order can be a **depth-first** or **breadth-first** 60 | representation of the dependencies. 61 | 62 | #### clean_extras 63 | Sometimes after a task we want to clean additional files that aren't necessarily known 64 | ahead of time so they can be listed as explicit targets. This property can be used 65 | to list glob patterns such as `*.png` for finding additional pieces to clean. 66 | 67 | #### id 68 | A unique id of the rule. 69 | 70 | #### doc 71 | A short documentation string to display when listing rules or for log output. 72 | 73 | #### inputs 74 | A list of all input files that are required to execute this rule. 75 | 76 | #### outputs 77 | A list of outputs that are generated by this rule. 78 | 79 | #### command 80 | The command required to execute this rule. 81 | 82 | #### requirements 83 | A list of immediate pre-requisite rule ids required by this rule. 84 | 85 | 86 | # Copyright 87 | 88 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms. 89 | -------------------------------------------------------------------------------- /juneberry/config/report.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | import sys 27 | 28 | from prodict import List, Prodict 29 | 30 | from juneberry.config.plugin import Plugin 31 | import juneberry.config.util as jb_conf_utils 32 | import juneberry.filesystem as jb_fs 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | class ReportConfig(Prodict): 38 | FORMAT_VERSION = '0.1.0' 39 | SCHEMA_NAME = 'report_schema.json' 40 | reports: List[Plugin] 41 | 42 | @staticmethod 43 | def construct(data: dict, file_path: str = None): 44 | """ 45 | Load, validate, and construct a config object from a supposedly VALID and LATEST FORMAT report. 46 | :param data: The data to use to construct the object. 47 | :param file_path: Optional path to a file that may have been loaded. Used for logging. 48 | :return: A constructed and validated object. 49 | """ 50 | 51 | # Validate 52 | if not jb_conf_utils.validate_schema(data, ReportConfig.SCHEMA_NAME): 53 | logger.error(f"Validation errors in ReportConfig from {file_path}. See log. Exiting.") 54 | sys.exit(-1) 55 | 56 | # Finally, construct the object and do a final value cleanup 57 | report_config = ReportConfig.from_dict(data) 58 | return report_config 59 | 60 | @staticmethod 61 | def load(data_path: str): 62 | """ 63 | Load the config from the provided path, validate, and construct the config. 64 | :param data_path: Path to config. 65 | :return: Loaded, validated, and constructed object. 66 | """ 67 | # Load the raw file. 68 | logger.info(f"Loading REPORT CONFIG from {data_path}") 69 | data = jb_fs.load_file(data_path) 70 | 71 | # Validate and construct the model. 72 | return ReportConfig.construct(data, data_path) 73 | -------------------------------------------------------------------------------- /test/metrics/classification/test_classification_metrics.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import json 26 | from pathlib import Path 27 | from typing import List 28 | 29 | import numpy 30 | import pytest 31 | import torch 32 | 33 | import juneberry.metrics.classification.metrics_manager as mm 34 | from juneberry.config.model import Plugin 35 | 36 | test_data_dir = Path(__file__).resolve().parent / "data" 37 | 38 | config_filename = test_data_dir / "config_classification.json" 39 | 40 | with open(config_filename, 'r') as f: 41 | config_data = json.load(f) 42 | 43 | metrics_plugins: List[Plugin] = [] 44 | for cd in config_data["metrics"]: 45 | metrics_plugins.append(Plugin.from_dict(cd)) 46 | 47 | target = torch.tensor([0, 1, 2]) 48 | preds = torch.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]]) 49 | 50 | # metrics plugins take numpy inputs 51 | with torch.set_grad_enabled(False): 52 | preds_np = preds.cpu().numpy() 53 | target_np = target.cpu().detach().numpy() 54 | 55 | metrics_mgr = mm.MetricsManager(metrics_plugins) 56 | metrics = metrics_mgr(target_np, preds_np, binary = False) 57 | 58 | def approx(expected_val): 59 | return pytest.approx(expected_val, abs=5e-3) 60 | 61 | def test_torchmetrics_functional(): 62 | assert numpy.equal(metrics["func_accuracy"], numpy.array(0.6666667, dtype=numpy.float32)) 63 | 64 | def test_torchmetrics_classbased(): 65 | assert numpy.equal(metrics["obj_accuracy"], numpy.array(0.6666667, dtype=numpy.float32)) 66 | 67 | def test_torchnn(): 68 | assert numpy.equal(metrics["loss"], numpy.array(1.3038288, dtype=numpy.float32)) 69 | 70 | def test_sklearn_metrics(): 71 | assert metrics["accuracy_score"] == 0 72 | 73 | def test_tensorflow_classbased(): 74 | assert metrics["tf_accuracy"] == 3.0 75 | 76 | def test_tensorflow_functional(): 77 | assert metrics["tf_binary_accuracy"] == approx(0.33333334) 78 | -------------------------------------------------------------------------------- /docker/databricks/monit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ### BEGIN INIT INFO 4 | # Provides: monit 5 | # Required-Start: $remote_fs 6 | # Required-Stop: $remote_fs 7 | # Should-Start: $all 8 | # Should-Stop: $all 9 | # Default-Start: 2 3 4 5 10 | # Default-Stop: 0 1 6 11 | # Short-Description: service and resource monitoring daemon 12 | # Description: monit is a utility for managing and monitoring 13 | # processes, programs, files, directories and filesystems 14 | # on a Unix system. Monit conducts automatic maintenance 15 | # and repair and can execute meaningful causal actions 16 | # in error situations. 17 | ### END INIT INFO 18 | 19 | set -e 20 | 21 | . /lib/lsb/init-functions 22 | 23 | DAEMON=/usr/bin/monit 24 | CONFIG=/etc/monit/monitrc 25 | NAME=monit 26 | DESC="daemon monitor" 27 | MONIT_OPTS= 28 | PID="/run/$NAME.pid" 29 | 30 | # Check if DAEMON binary exist 31 | [ -f $DAEMON ] || exit 0 32 | 33 | [ -f "/etc/default/$NAME" ] && . /etc/default/$NAME 34 | 35 | MONIT_OPTS="-c $CONFIG $MONIT_OPTS" 36 | 37 | monit_not_configured () { 38 | if [ "$1" != "stop" ] 39 | then 40 | printf "\tplease configure $NAME and then edit /etc/default/$NAME\n" 41 | printf "\tand set the \"START\" variable to \"yes\" in order to allow\n" 42 | printf "\t$NAME to start\n" 43 | fi 44 | exit 0 45 | } 46 | 47 | monit_checks () { 48 | # Check if START variable is set to "yes", if not we exit. 49 | if [ "$START" != "yes" ] 50 | then 51 | monit_not_configured $1 52 | fi 53 | } 54 | 55 | case "$1" in 56 | start) 57 | log_daemon_msg "Starting $DESC" "$NAME" 58 | monit_checks $1 59 | if start-stop-daemon --start --quiet --oknodo --pidfile $PID --exec $DAEMON -- $MONIT_OPTS 1>/dev/null 60 | then 61 | log_end_msg 0 62 | else 63 | log_end_msg 1 64 | fi 65 | ;; 66 | stop) 67 | log_daemon_msg "Stopping $DESC" "$NAME" 68 | if start-stop-daemon --retry TERM/5/KILL/5 --oknodo --stop --quiet --pidfile $PID 1>/dev/null 69 | then 70 | log_end_msg 0 71 | else 72 | log_end_msg 1 73 | fi 74 | ;; 75 | reload) 76 | log_daemon_msg "Reloading $DESC configuration" "$NAME" 77 | if start-stop-daemon --stop --signal HUP --quiet --oknodo --pidfile $PID --exec $DAEMON -- $MONIT_OPTS 1>/dev/null 78 | then 79 | log_end_msg 0 80 | else 81 | log_end_msg 1 82 | fi 83 | ;; 84 | restart|force-reload) 85 | log_daemon_msg "Restarting $DESC" "$NAME" 86 | start-stop-daemon --retry TERM/5/KILL/5 --oknodo --stop --quiet --pidfile $PID 1>/dev/null 87 | if start-stop-daemon --start --quiet --oknodo --pidfile $PID --exec $DAEMON -- $MONIT_OPTS 1>/dev/null 88 | then 89 | log_end_msg 0 90 | else 91 | log_end_msg 1 92 | fi 93 | ;; 94 | syntax) 95 | $DAEMON $MONIT_OPTS -t 96 | ;; 97 | status) 98 | status_of_proc -p $PID $DAEMON $NAME 99 | ;; 100 | *) 101 | log_action_msg "Usage: /etc/init.d/$NAME {start|stop|reload|restart|force-reload|syntax|status}" 102 | ;; 103 | esac 104 | 105 | exit 0 106 | -------------------------------------------------------------------------------- /juneberry/pytorch/tabular_dataset.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import numpy as np 26 | 27 | from juneberry.pytorch.utils import EpochDataset 28 | 29 | 30 | class TabularDataset(EpochDataset): 31 | """ 32 | Loads data from a list of CSV files. 33 | We assume the CSV has a header in each input file, and that the headers are the same. 34 | We extract the column that has the label number. 35 | """ 36 | 37 | def __init__(self, rows_labels, transforms=None): 38 | """ 39 | Initialize the tabular data set loader. 40 | :param rows_labels: A list of pairs of the row data and labels. 41 | :param transforms: Any transforms to be applied to each row of floats per epoch. 42 | """ 43 | super().__init__() 44 | 45 | self.transforms = transforms 46 | for item in rows_labels: 47 | assert len(item) == 2 48 | self.rows_labels = [] 49 | 50 | # Pre-process the entire thing to big float arrays so it is ready for transformation. 51 | for row, label in rows_labels: 52 | row = [float(x) for x in row] 53 | self.rows_labels.append([row, label]) 54 | 55 | def __len__(self): 56 | """ :return: Total number of samples. """ 57 | return len(self.rows_labels) 58 | 59 | def __getitem__(self, index): 60 | """ 61 | Return one item. 62 | :param index: The index within the data set. 63 | :return: One transformed item with label 64 | """ 65 | row, label = self.rows_labels[index] 66 | 67 | if self.transforms is not None: 68 | row = row.copy() 69 | args = {'label': label, 'index': index, 'epoch': self.epoch} 70 | row, label = self.transforms(row, **args) 71 | 72 | # They want a row as float 73 | row = np.array(row).astype(np.float32) 74 | 75 | return row, label 76 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Juneberry - Release 0.5 2 | 3 | Copyright 2022 Carnegie Mellon University. 4 | 5 | BSD (SEI) 6 | 7 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 8 | following conditions are met: 9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 10 | disclaimer. 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 12 | following disclaimer in the documentation and/or other materials provided with the distribution. 13 | 3. Products derived from this software may not include “Carnegie Mellon University,” "SEI” and/or “Software 14 | Engineering Institute" in the name of such derived product, nor shall “Carnegie Mellon University,” "SEI” 15 | and/or “Software Engineering Institute" be used to endorse or promote products derived from this software 16 | without prior written permission. For written permission, please contact permission@sei.cmu.edu. 17 | 18 | ACKNOWLEDGMENTS AND DISCLAIMERS: 19 | Juneberry - Release 0.5 includes and/or can make use of certain third party software ("Third Party Software"). The 20 | Third Party Software that is used by Juneberry - Release 0.5 is dependent upon your system configuration, but 21 | typically includes the software identified in the documentation and/or ReadMe files. By using Juneberry - Release 0.5, 22 | you agree to comply with any and all relevant Third Party Software terms and conditions contained in any such Third 23 | Party Software or separate license file distributed with such Third Party Software. The parties who own the Third Party 24 | Software ("Third Party Licensors") are intended third party beneficiaries to this License with respect to the terms 25 | applicable to their Third Party Software. Third Party Software licenses only apply to the Third Party Software and not 26 | any other portion of Juneberry - Release 0.5 or Juneberry - Release 0.5 as a whole. 27 | 28 | This material is based upon work funded and supported by the Department of Defense under Contract No. FA8702-15-D-0002 29 | with Carnegie Mellon University for the operation of the Software Engineering Institute, a federally funded research 30 | and development center. 31 | 32 | The view, opinions, and/or findings contained in this material are those of the author(s) and should not be construed 33 | as an official Government position, policy, or decision, unless designated by other documentation. 34 | 35 | NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 36 | BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 37 | INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 38 | FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 39 | FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 40 | 41 | [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 42 | Copyright notice for non-US Government use and distribution. 43 | 44 | DM22-0856 -------------------------------------------------------------------------------- /juneberry/metrics/classification/torchmetrics/metrics.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | This module provides a torchmetrics classification metric plugin to be used with Juneberry. 27 | """ 28 | import inspect 29 | import logging 30 | from typing import Dict 31 | 32 | import torch 33 | 34 | from juneberry.loader import construct_instance, load_verify_fqn_function 35 | from juneberry.metrics.classification.metrics import MetricsBase 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | class Metrics(MetricsBase): 41 | 42 | def __init__(self, 43 | fqn: str, 44 | name: str, 45 | kwargs: Dict = None) -> None: 46 | super().__init__(fqn, name, kwargs) 47 | 48 | def __call__(self, target, preds, binary): 49 | target, preds = torch.LongTensor(target), torch.FloatTensor(preds) 50 | 51 | # Torchmetrics has class-based and functional versions of its metrics. 52 | # If we fail to instantiate self.fqn as a function, try to construct a class instance instead. 53 | metrics_function = load_verify_fqn_function(self.fqn, {**{"preds": [], "target": []}, **self.kwargs}) 54 | if not metrics_function: 55 | metrics_function = construct_instance(self.fqn, self.kwargs) 56 | 57 | # If metrics_function doesn't exist now, we were unable to instantiate either 58 | # a class instance or a functional version of the metric. 59 | if not metrics_function: 60 | log_msg = f"Unable to create metrics function: fqn={self.fqn}, name={self.name}, kwargs={self.kwargs}." 61 | logger.error(log_msg) 62 | raise ValueError(log_msg) 63 | else: 64 | if inspect.isfunction(metrics_function): 65 | result = metrics_function(preds, target, **self.kwargs) 66 | else: 67 | result = metrics_function(preds, target) 68 | return result.numpy() 69 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import setuptools 26 | 27 | extras = { 28 | 'tf': ['tensorflow', 'tensorflow-datasets'], 29 | 'torch': ['torch', 'torchvision', "torch-summary>=1.4.5", "torchmetrics"], 30 | 'onnx': ['protobuf==3.16.0', 'onnx', 'onnxruntime', 'tf2onnx'], 31 | 'onnx-gpu': ['protobuf==3.16.0', 'onnx', 'onnxruntime-gpu', 'tf2onnx'], 32 | 'opacus': ['opacus'] 33 | } 34 | extras['all'] = extras['tf'] + \ 35 | extras['torch'] + \ 36 | extras['onnx'] + \ 37 | extras['opacus'] 38 | extras['all-gpu'] = extras['tf'] + \ 39 | extras['torch'] + \ 40 | extras['onnx-gpu'] + \ 41 | extras['opacus'] 42 | 43 | install_requires = [ 44 | "doit", 45 | "numpy", 46 | "pycocotools", 47 | "matplotlib", 48 | "pillow", 49 | "prodict", 50 | "hjson", 51 | "jsonschema", 52 | "scikit-learn", 53 | "tqdm", 54 | "tensorboard", 55 | "pandas", 56 | "brambox", 57 | "pyyaml", 58 | "hjson", 59 | "natsort", 60 | "ray", 61 | "jsonpath-ng" 62 | ] 63 | 64 | bin_scripts = [ 65 | 'bin/jb_attack_to_rules', 66 | 'bin/jb_clean_experiment_evals', 67 | 'bin/jb_evaluate', 68 | 'bin/jb_experiment_to_rules', 69 | 'bin/jb_generate_experiments', 70 | 'bin/jb_generate_watermark_eval', 71 | 'bin/jb_gpu_runner', 72 | 'bin/jb_process_dataset', 73 | 'bin/jb_report', 74 | 'bin/jb_rules_to_pydoit', 75 | 'bin/jb_run_experiment', 76 | 'bin/jb_run_plugin', 77 | 'bin/jb_train', 78 | 'bin/jb_tune' 79 | ] 80 | 81 | setuptools.setup( 82 | name='Juneberry', 83 | version='0.5.1', 84 | description='Juneberry Machine Learning Experiment Manager', 85 | packages=setuptools.find_packages(), 86 | install_requires=install_requires, 87 | scripts=bin_scripts, 88 | python_requires='>=3.7', 89 | include_package_data=True, 90 | extras_require=extras 91 | ) 92 | -------------------------------------------------------------------------------- /bin/jb_run_plugin: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | import logging 27 | import sys 28 | 29 | from juneberry.filesystem import load_json 30 | import juneberry.loader as jb_loader 31 | import juneberry.scripting.utils as jb_scripting 32 | 33 | logger = logging.getLogger("juneberry.jb_run_plugin") 34 | 35 | 36 | def run_plugin(plugin: str): 37 | """ 38 | This function is responsible for constructing an instance of the desired plugin and then calling it. 39 | :param plugin: A string describing the location in the filesystem of the JSON file 40 | containing the plugin information. 41 | :return: Nothing. 42 | """ 43 | # Retrieve the data from the JSON file. 44 | json = load_json(plugin) 45 | 46 | if 'fqcn' not in json: 47 | logger.error(f"Expected key 'fqcn' in target plugin JSON file was not found. Exiting.") 48 | sys.exit(-1) 49 | 50 | if 'kwargs' not in json: 51 | json['kwargs'] = {} 52 | 53 | # Construct an instance of the class described in the plugin file. 54 | plugin_obj = jb_loader.construct_instance(json['fqcn'], json['kwargs']) 55 | 56 | # Call the instance of the class (run the plugin). 57 | plugin_obj() 58 | 59 | 60 | def setup_args(parser) -> None: 61 | """ 62 | Adds arguments to the parser 63 | :param parser: The parser in which to add arguments. 64 | """ 65 | parser.add_argument('pluginFile', help='A JSON file containing the name of the class representing the desired ' 66 | 'plugin to run, along with any kwargs to pass to the instance.') 67 | 68 | 69 | def main(): 70 | # Setup and parse all arguments. 71 | parser = argparse.ArgumentParser(description="") 72 | setup_args(parser) 73 | jb_scripting.setup_args(parser) 74 | args = parser.parse_args() 75 | 76 | # Set up logging. 77 | jb_scripting.setup_logging_for_script(args) 78 | 79 | # Run the plugin. 80 | run_plugin(args.pluginFile) 81 | 82 | logger.info(f"jb_run_plugin is done.") 83 | 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /bin/jb_clean_experiment_evals: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | This script cleans all of the predictions files produced by jb_evaluate. 27 | 28 | """ 29 | 30 | import argparse 31 | import logging 32 | import subprocess 33 | 34 | from juneberry.config.rule_list import RulesList 35 | from juneberry.filesystem import ExperimentManager 36 | import juneberry.scripting.utils as jb_scripting 37 | 38 | logger = logging.getLogger("juneberry.jb_clean_experiment_evals") 39 | 40 | 41 | def setup_args(parser) -> None: 42 | """ 43 | Adds arguments to the parser 44 | :param parser: The parser in which to add arguments. 45 | """ 46 | parser.add_argument("experimentName", help='Name of the experiment in the experiments directory whose eval ' 47 | 'directories should be cleaned.') 48 | 49 | 50 | def main(): 51 | parser = argparse.ArgumentParser(description="Cleans the eval directories in an experiment.") 52 | setup_args(parser) 53 | jb_scripting.setup_args(parser) 54 | args = parser.parse_args() 55 | 56 | experiment_manager = ExperimentManager(args.experimentName) 57 | log_file = experiment_manager.get_log_path() 58 | banner_msg = f">>> Juneberry Experiment Eval Cleaner - {args.experimentName} <<<" 59 | 60 | lab = jb_scripting.setup_workspace(args, log_file=log_file, log_prefix="<> ", banner_msg=banner_msg) 61 | workspace_root = lab.workspace() 62 | rules_file = experiment_manager.get_experiment_rules() 63 | dodo_file = experiment_manager.get_experiment_dodo(workflow="main") 64 | 65 | rules = RulesList.load(rules_file) 66 | 67 | tasks = [] 68 | for workflow in rules.workflows: 69 | for rule in workflow.rules: 70 | if rule.command[0] == "jb_evaluate": 71 | tasks.append(rule.id) 72 | 73 | for task in tasks: 74 | cmd = ["doit", "-f", dodo_file, "--dir", workspace_root, "clean", str(task)] 75 | subprocess.run(cmd) 76 | 77 | logger.info("jb_clean_experiment_evals is done.") 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /test/test_coco_annotations.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import unittest 26 | 27 | from juneberry.config.coco_anno import CocoAnnotations 28 | 29 | 30 | def make_basic_config(): 31 | # Based on https://blog.superannotate.com/coco-dataset-introduction/ 32 | return { 33 | "info": { 34 | "year": 2021, 35 | "version": "1.2" 36 | }, 37 | "licenses": [], 38 | "categories": [ 39 | { 40 | "id": 1, 41 | "name": "poodle", 42 | "supercategory": "dog" 43 | }, 44 | { 45 | "id": 2, 46 | "name": "ragdoll", 47 | "supercategory": "cat" 48 | } 49 | ], 50 | "images": [ 51 | { 52 | "id": 122214, 53 | "width": 640, 54 | "height": 640, 55 | "file_name": "84.jpg", 56 | "license": 1, 57 | "date_captured": "2021-07-19 17:49" 58 | } 59 | ], 60 | "annotations": [ 61 | { 62 | "area": 600.4, 63 | "iscrowd": 1, 64 | "image_id": 122214, 65 | "bbox": [473.05, 395.45, 38.65, 28.92], 66 | "category_id": 1, 67 | "id": 934 68 | } 69 | ] 70 | } 71 | 72 | 73 | class TestCocoAnno(unittest.TestCase): 74 | def test_config_basics(self): 75 | config = make_basic_config() 76 | coco_anno = CocoAnnotations.construct(config) 77 | assert len(config['images']) == len(coco_anno['images']) 78 | assert len(config['annotations']) == len(coco_anno['annotations']) 79 | 80 | def test_duplicate_images(self): 81 | config = make_basic_config() 82 | config['images'].append(config['images'][0]) 83 | 84 | with self.assertRaises(SystemExit), self.assertLogs(level='ERROR') as log: 85 | CocoAnnotations.construct(config) 86 | message = "Found duplicate image id: id= '122214'." 87 | self.assertIn(message, log.output[0]) 88 | -------------------------------------------------------------------------------- /juneberry/schemas/coco_anno_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "type": "object", 4 | "properties": { 5 | "info": { 6 | "type": "object", 7 | "properties": { 8 | "year": { "type": "integer"}, 9 | "version": { "type": "string"}, 10 | "description": { "type": "string"}, 11 | "contributor": { "type": "string"}, 12 | "url": { "type": "string"}, 13 | "date_created": { "type": "string"} 14 | } 15 | }, 16 | "licenses": { 17 | "type": "array", 18 | "items": { 19 | "type": "object", 20 | "properties": { 21 | "id": { "type": "integer"}, 22 | "name": { "type": "string" }, 23 | "url": { "type": "string"} 24 | }, 25 | "required": [ 26 | "id", 27 | "name", 28 | "url" 29 | ] 30 | } 31 | }, 32 | "categories": { 33 | "type": "array", 34 | "items": { 35 | "type": "object", 36 | "properties": { 37 | "id": { "type": "integer" }, 38 | "name": { "type": "string" }, 39 | "supercategory": { "type": "string" } 40 | }, 41 | "required": [ 42 | "id", 43 | "name" 44 | ] 45 | } 46 | }, 47 | "images": { 48 | "type": "array", 49 | "items": { 50 | "type": "object", 51 | "properties": { 52 | "id": { "type": "integer" }, 53 | "width": { "type": "integer" }, 54 | "height": { "type": "integer" }, 55 | "file_name": { "type": "string" }, 56 | "license": { "type": "integer" }, 57 | "flickr_url": { "type": "string" }, 58 | "coco_url": { "type": "string" }, 59 | "date_captured": { "type": "string" } 60 | }, 61 | "required": [ 62 | "id", 63 | "width", 64 | "height", 65 | "file_name" 66 | ] 67 | } 68 | }, 69 | "annotations": { 70 | "type": "array", 71 | "items": { 72 | "type": "object", 73 | "properties": { 74 | "id": { "type": "integer" }, 75 | "image_id": { "type": "integer" }, 76 | "category_id": { "type": "integer" }, 77 | "segmentation": {}, 78 | "area": { "type": "number" }, 79 | "bbox": { 80 | "type": "array", 81 | "items": { "type": "number" } 82 | }, 83 | "iscrowd": {"type": "integer"}, 84 | "score": { "type": "number" } 85 | }, 86 | "required": [ 87 | "id", 88 | "image_id", 89 | "category_id" 90 | ] 91 | } 92 | } 93 | }, 94 | "required": [ 95 | "categories", 96 | "images" 97 | ] 98 | } 99 | -------------------------------------------------------------------------------- /scripts/reformat_predictions.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | import json 27 | from pathlib import Path 28 | import sys 29 | 30 | 31 | def reformat_data(manifest, pred): 32 | # The manifest and predictions are in the same order, so just numerically 33 | # walk the number of images and spew out a new structure. 34 | new_pred = [] 35 | 36 | pred_labels = pred['results']['labels'] 37 | pred_preds = pred['results']['predictions'] 38 | 39 | for idx, item in enumerate(manifest): 40 | # Double check the label 41 | assert item['label'] == pred_labels[idx] 42 | 43 | # Make a new entry 44 | new_pred.append({ 45 | "path": item['path'], 46 | "label": item['label'], 47 | "predictions": pred_preds[idx] 48 | }) 49 | 50 | # Replace it in the predictions structure and return 51 | new_out = pred.copy() 52 | del new_out['results']['labels'] 53 | new_out['results']['predictions'] = new_pred 54 | 55 | return new_out 56 | 57 | 58 | def reformat_file(eval_dir: str): 59 | manifest_path = Path(eval_dir) / "eval_manifest.json" 60 | pred_path = Path(eval_dir) / "predictions.json" 61 | out_path = Path(eval_dir) / "predictions_v2.json" 62 | 63 | if not manifest_path.exists(): 64 | print(f"Missing '{manifest_path}' file. Exiting.") 65 | sys.exit() 66 | 67 | if not pred_path.exists(): 68 | print(f"Missing '{pred_path}' file. Exiting.") 69 | sys.exit() 70 | 71 | with open(pred_path) as pred_file: 72 | pred_data = json.load(pred_file) 73 | 74 | with open(manifest_path) as manifest_file: 75 | manifest_data = json.load(manifest_file) 76 | 77 | out_data = reformat_data(manifest_data, pred_data) 78 | 79 | with open(out_path, "w") as out_file: 80 | json.dump(out_data, out_file, indent=4) 81 | 82 | 83 | def main(): 84 | parser = argparse.ArgumentParser() 85 | parser.add_argument("eval_dir", help="Path to directory with predictions and manifest.") 86 | args = parser.parse_args() 87 | reformat_file(args.eval_dir) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /juneberry/config/hashes.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | import sys 27 | 28 | from prodict import Prodict 29 | 30 | import juneberry.config.util as jb_conf_utils 31 | import juneberry.filesystem as jb_fs 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | class Hashes(Prodict): 37 | FORMAT_VERSION = '0.3.0' 38 | SCHEMA_NAME = 'hashes_schema.json' 39 | 40 | model_architecture: str 41 | 42 | @staticmethod 43 | def construct(data: dict, file_path: str = None): 44 | """ 45 | Validate and construct a Hashes object. 46 | :param data: The data to use to construct the object. 47 | :param file_path: Optional path to a file that may have been loaded. Used for logging. 48 | :return: A constructed object. 49 | """ 50 | 51 | # Validate with our schema 52 | if not jb_conf_utils.validate_schema(data, Hashes.SCHEMA_NAME): 53 | logger.error(f"Validation errors in Hashes object from {file_path}. See log. Exiting!") 54 | sys.exit(-1) 55 | 56 | # Finally, construct the object 57 | return Hashes.from_dict(data) 58 | 59 | @staticmethod 60 | def load(data_path: str): 61 | """ 62 | Load the config from the provided path, validate, and construct the config. 63 | :param data_path: Path to config. 64 | :return: Loaded, validated, and constructed object. 65 | """ 66 | # Load the raw file. 67 | logger.info(f"Loading HASHES CONFIG from {data_path}") 68 | data = jb_fs.load_file(data_path) 69 | 70 | # Validate and construct the model. 71 | return Hashes.construct(data, data_path) 72 | 73 | def to_json(self): 74 | """ :return: A pure dictionary version suitable for serialization to json.""" 75 | return jb_conf_utils.prodict_to_dict(self) 76 | 77 | def save(self, data_path: str) -> None: 78 | """ 79 | Save the HashesConfig to the specified resource path. 80 | :param data_path: The path to the resource. 81 | :return: None 82 | """ 83 | jb_conf_utils.validate_and_save_json(self.to_json(), data_path, Hashes.SCHEMA_NAME) 84 | -------------------------------------------------------------------------------- /scripts/draw_boxes_from_anno_file.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | import logging 27 | from pathlib import Path 28 | import sys 29 | 30 | import juneberry.config.coco_utils as coco_utils 31 | import juneberry.scripting.utils as jb_scripting 32 | 33 | logger = logging.getLogger("juneberry.scripts.draw_boxes_from_anno_file") 34 | 35 | 36 | def setup_args(parser) -> None: 37 | """ 38 | Adds arguments to the parser 39 | :param parser: The parser in which to add arguments. 40 | """ 41 | parser.add_argument('annotationsFile', 42 | help="COCO annotations file describing both the raw images, and the bounding boxes around " 43 | "the objects that were detected in each image. ") 44 | parser.add_argument('-o', '--outputDir', 45 | help='An optional output directory where the image results will be saved. When this argument ' 46 | 'is not provided, the images will be saved to the current working directory in a ' 47 | 'directory named "boxed_imgs".') 48 | 49 | 50 | def main(): 51 | # Setup and parse all arguments. 52 | parser = argparse.ArgumentParser(description="This script takes a COCO annotations file and produces a directory " 53 | "of images with bounding boxes drawn around the objects " 54 | "described in the annotations.") 55 | setup_args(parser) 56 | jb_scripting.setup_args(parser) 57 | args = parser.parse_args() 58 | 59 | # Set up the Lab. 60 | lab = jb_scripting.setup_workspace(args, log_file=None) 61 | 62 | # Check if the desired annotations file exists. Log an error and exit if it can't be found. 63 | anno_file = Path(args.annotationsFile) 64 | if not anno_file.exists(): 65 | logger.error(f"The annotations file {anno_file} was not found. EXITING.") 66 | sys.exit(-1) 67 | 68 | # Add the bounding boxes to the images and save them to the output directory. 69 | coco_utils.generate_bbox_images(anno_file, lab, args.outputDir) 70 | 71 | logger.info("Done.") 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /scripts/coco_image_use.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | import json 27 | import logging 28 | from pathlib import Path 29 | 30 | import juneberry.config.coco_utils as coco_utils 31 | import juneberry.scripting.utils as jb_scripting 32 | 33 | logger = logging.getLogger("juneberry.coco_image_use.py") 34 | 35 | 36 | def setup_args(parser): 37 | parser.add_argument("model", help="Model to search through.") 38 | parser.add_argument("file_name", help="Image filename to search for.") 39 | parser.add_argument("-e", "--evals", default=False, action='store_true', help="Also scan all eval directories.") 40 | 41 | 42 | def show_uses(coco_path, file_name): 43 | # We get the annotations as a merged file list to make it easy to find. 44 | # We just scan the list looking for that filename, and if we find it, show it. 45 | logger.info(f"Searching {coco_path} for {file_name}...") 46 | helper = coco_utils.load_from_json_file(coco_path) 47 | flat_list = helper.to_image_list() 48 | 49 | for entry in flat_list: 50 | entry_file_path = Path(entry.file_name) 51 | if file_name == entry.file_name or file_name == entry_file_path.name: 52 | logger.info(json.dumps(entry, indent=4)) 53 | return 54 | 55 | logger.info(f" {file_name} was not found in {coco_path}") 56 | 57 | 58 | def main(): 59 | parser = argparse.ArgumentParser(description="This script searches the specified model for " 60 | "uses of the specified image.") 61 | jb_scripting.setup_args(parser) 62 | setup_args(parser) 63 | args = parser.parse_args() 64 | 65 | # Get the lab and model manager 66 | lab = jb_scripting.setup_for_single_model(args, log_file=None, model_name=args.model) 67 | model_manager = lab.model_manager(args.model) 68 | 69 | # See if we can find them. 70 | show_uses(model_manager.get_training_data_manifest_path(), args.file_name) 71 | show_uses(model_manager.get_validation_data_manifest_path(), args.file_name) 72 | 73 | if args.evals: 74 | logger.info("Scanning eval dirs.") 75 | for eval_dir in model_manager.iter_eval_dirs(): 76 | show_uses(eval_dir.get_manifest_path(), args.file_name) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /juneberry/schemas/evaluation_output_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "type": "object", 4 | "properties": { 5 | "format_version": { "type": "string" }, 6 | "options": { 7 | "type": "object", 8 | "properties": { 9 | "dataset": { 10 | "type": "object", 11 | "properties": { 12 | "classes": { "type": "object" }, 13 | "config": { "type": "string" }, 14 | "histogram": { "type": "object" } 15 | }, 16 | "required": [ "config" ] 17 | }, 18 | "model": { 19 | "type": "object", 20 | "properties": { 21 | "hash": { "type": "string" }, 22 | "name": { "type": "string" }, 23 | "num_classes": { "type": "number" } 24 | }, 25 | "required": [ "name" ] 26 | } 27 | }, 28 | "required": [ "dataset", "model" ] 29 | }, 30 | "results": { 31 | "type": "object", 32 | "properties": { 33 | "classifications": { 34 | "type": "array", 35 | "items": { 36 | "type": "object", 37 | "properties": { 38 | "file": { "type": "string" }, 39 | "actual_label": { "type": "number" }, 40 | "actual_label_name": { "type": "string" }, 41 | "predicted_classes": { 42 | "type": "array", 43 | "items": { 44 | "type": "object", 45 | "properties": { 46 | "label": { "type": "number" }, 47 | "label_name": { "type": "string" }, 48 | "confidence": { "type": "number" } 49 | } 50 | } 51 | } 52 | } 53 | } 54 | }, 55 | "labels": { 56 | "type": "array", 57 | "items": { "type": "number" } 58 | }, 59 | "metrics": { 60 | "type": "object", 61 | "properties": { 62 | "classification": { "type": "object" }, 63 | "bbox": { "type": "object" }, 64 | "bbox_per_class": { "type": "object" } 65 | }, 66 | "required": [ ] 67 | }, 68 | "predictions": { 69 | "type": "array", 70 | "items": { 71 | "type": "array", 72 | "items": { "type": "number" } 73 | } 74 | } 75 | }, 76 | "required": [ ] 77 | }, 78 | "times": { 79 | "type": "object", 80 | "properties": { 81 | "duration": { "type": "number" }, 82 | "end_time": { "type": "string" }, 83 | "start_time": { "type": "string" } 84 | }, 85 | "required": [ ] 86 | } 87 | }, 88 | "required": [ 89 | "options", 90 | "results" 91 | ] 92 | } 93 | -------------------------------------------------------------------------------- /juneberry/transforms/image.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | """ 26 | A set of general image conversions. 27 | """ 28 | 29 | from PIL import Image 30 | 31 | import juneberry.image as jb_img_utils 32 | 33 | 34 | class ConvertMode: 35 | """ 36 | Converts the mode of the input image to the specified mode. 37 | "kwargs": { "mode": 'RGB' } 38 | """ 39 | 40 | def __init__(self, mode): 41 | self.mode = mode 42 | 43 | def __call__(self, image): 44 | if image.mode != self.mode: 45 | return image.convert(self.mode) 46 | 47 | return image 48 | 49 | 50 | class ResizePad: 51 | """ 52 | Resizes the image maintaining aspect ratio, padding with the specified color if necessary. 53 | 54 | NOTE: This uses Image.ANTIALIAS resampling. 55 | 56 | "kwargs": { "width": 224, "height": 224, "color": [ 0,0,0 ] } 57 | """ 58 | 59 | def __init__(self, width, height, pad_color=(0, 0, 0)): 60 | self.width = width 61 | self.height = height 62 | self.color = pad_color 63 | 64 | def __call__(self, image): 65 | return jb_img_utils.resize_image(image, self.width, self.height, self.color) 66 | 67 | 68 | class ChangeAllLabelsTo: 69 | def __init__(self, label): 70 | self.label = label 71 | 72 | def __call__(self, image, label): 73 | return image, self.label 74 | 75 | 76 | class Watermark: 77 | def __init__(self, watermark_path, min_scale=1.0, max_scale=1.0, rotation=0, blur=0): 78 | # NOTE: Opening is lazy we need to force loading with load() 79 | self.watermark = Image.open(watermark_path).copy() 80 | self.min_scale = min_scale 81 | self.max_scale = max_scale 82 | self.rotation = rotation 83 | self.blur = blur 84 | 85 | def __call__(self, image): 86 | # Copy the watermark so we can munge it 87 | tmp_img: Image = self.watermark.copy() 88 | 89 | # Transform watermark 90 | tmp_img = jb_img_utils.transform_image(tmp_img, (self.min_scale, self.max_scale), self.rotation, self.blur) 91 | 92 | # Insert at a random location 93 | x, y = jb_img_utils.make_random_insert_position(tmp_img.size, image.size) 94 | image = jb_img_utils.insert_watermark_at_position(image, tmp_img, (x, y)) 95 | 96 | return image 97 | -------------------------------------------------------------------------------- /juneberry/tensorboard.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | from torch.utils.tensorboard import SummaryWriter 26 | 27 | 28 | class TensorBoardManager: 29 | """ 30 | Responsible for logging data for TensorBoard. 31 | """ 32 | 33 | def __init__(self, tb_root, model_manager): 34 | self.tensorboard_root = tb_root 35 | self.log_dir = model_manager.create_tensorboard_directory_name(tb_root) 36 | self.summary_writer = SummaryWriter(log_dir=self.log_dir) 37 | 38 | layout = { 39 | 'Accuracy': { 40 | 'accuracy': ['Multiline', ['accuracy/combined', 'accuracy/train', 'accuracy/val']] 41 | }, 42 | 'Learning Rate': { 43 | 'learning rate': ['Multiline', []] 44 | }, 45 | 'Loss': { 46 | 'loss': ['Multiline', ['loss/combined', 'loss/train', 'loss/val']] 47 | } 48 | } 49 | self.summary_writer.add_custom_scalars(layout) 50 | 51 | def update(self, history, epoch) -> None: 52 | """ 53 | Write data to the tensorboard log. 54 | :param history: A data structure that tracks the training history 55 | :param epoch: An epoch number that can be used to look up a particular moment in the history. 56 | :return: 57 | """ 58 | self.summary_writer.add_scalar('Accuracy/train', history['accuracy'][epoch], epoch) 59 | self.summary_writer.add_scalar('Accuracy/val', history['val_accuracy'][epoch], epoch) 60 | self.summary_writer.add_scalars('Accuracy/combined', {'train': history['accuracy'][epoch], 61 | 'val': history['val_accuracy'][epoch]}, epoch) 62 | self.summary_writer.add_scalar('Loss/train', history['loss'][epoch], epoch) 63 | self.summary_writer.add_scalar('Loss/val', history['val_loss'][epoch], epoch) 64 | self.summary_writer.add_scalars('Loss/combined', {'train': history['loss'][epoch], 65 | 'val': history['val_loss'][epoch]}, epoch) 66 | self.summary_writer.add_scalar('Learning Rate', history['lr'][epoch], epoch) 67 | 68 | def close(self) -> None: 69 | """ 70 | Closes the summary writer. 71 | :return: 72 | """ 73 | self.summary_writer.close() 74 | -------------------------------------------------------------------------------- /scripts/model_transform.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import argparse 26 | import logging 27 | import sys 28 | 29 | import juneberry.pytorch.utils as pyt_utils 30 | from juneberry.transforms.transform_manager import TransformManager 31 | import juneberry.filesystem as jb_fs 32 | 33 | logger = logging.getLogger("juneberry.jb_model_transform") 34 | 35 | 36 | def convert_model(model_architecture, model_transforms, num_model_classes): 37 | model = pyt_utils.construct_model(model_architecture, num_model_classes) 38 | 39 | # Apply model transforms. 40 | transforms = TransformManager(model_transforms) 41 | transforms.transform(model) 42 | 43 | 44 | def main(): 45 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') 46 | 47 | # Setup and parse all arguments. 48 | parser = argparse.ArgumentParser(description="Constructs a model, applies transforms, and exits." 49 | "The config must be a subset of the training config and it" 50 | "must contain 'model_architecture' and 'model_transforms'" 51 | "stanzas. For loading weights and saving, include appropriate" 52 | "transforms in the 'model_transforms' stanza, as this has no inherent" 53 | "output.") 54 | 55 | parser.add_argument("config_path", help="Path to the config file with 'model_architecture' and 'model_transforms'.") 56 | parser.add_argument("num_model_classes", type=int, help="Number of model classes to use on construction.") 57 | 58 | args = parser.parse_args() 59 | 60 | # NOTE: We do NOT use the ModelConfig loader, because we do not require a full config at this time. 61 | config = jb_fs.load_file(args.config_path) 62 | 63 | if 'model_architecture' not in config: 64 | logger.error("Config does not have stanza 'model_architecture'. EXITING.") 65 | sys.exit(-1) 66 | 67 | if 'model_transforms' not in config: 68 | logger.error("Config does not have stanza 'model_transforms'. EXITING.") 69 | sys.exit(-1) 70 | 71 | convert_model(config['model_architecture'], config['model_transforms'], args.num_model_classes) 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /docs/building_docker.md: -------------------------------------------------------------------------------- 1 | Building Juneberry Docker Containers 2 | ========== 3 | 4 | *** 5 | 6 | **WARNING: These containers and scripts create containers with NO SECURITY PRACTICES, 7 | such as separate user accounts, unprivileged users, etc.** 8 | 9 | **USE AT YOUR OWN RISK.** 10 | 11 | *** 12 | 13 | 14 | # Overview 15 | 16 | This directory contains **Dockerfile**s, scripts for building various images for use with Juneberry, and some 17 | convenience scripts for running images. 18 | 19 | 20 | # Dockerfiles 21 | 22 | ## cpudev.Dockerfile 23 | 24 | An image with full cpu development support. Checkpoints NOT included. 25 | 26 | ## cudadev.Dockerfile 27 | 28 | The image to be used for development on cuda platforms. Checkpoints NOT included. 29 | 30 | # Building 31 | 32 | To build a particular docker image, use normal docker build commands, or the convenience script `build.sh`. 33 | The build script takes one argument, which is the part before the period in the Dockerfile name. 34 | For example, to build the cudadev image use `./build.sh cudadev`. 35 | 36 | # Automatic command execution on start 37 | 38 | When the containers start up they will look for a script called "container_start.sh" in the /juneberry 39 | directory (well, the one mounted as /juneberry) and, if found, will execute it. This is useful for 40 | automatically installing juneberry such as `pip install -e .` or running some test or something else. 41 | 42 | # Container layout 43 | 44 | The development process is based around the following lab layout: 45 | 46 | * /juneberry - Mount from the external users directory 47 | * /datasets - Mount to the external data directories. 48 | * /tensorboard - Mount point for tensorboard output 49 | * /root/.cache/torch/hub - Mounted for model caches for PyTorch and MMDetection 50 | * /root/.torch - Mounted for model caches for Detectron2 51 | 52 | The containers set the dataroot and tensorboard environment variables automatically. The current working 53 | directory will be chosen as the workspace, unless specified otherwise. 54 | 55 | # Convenience Scripts 56 | 57 | In addition to the script for building images, there are also some convenience scripts here. 58 | 59 | ## enter_juneberry_container 60 | 61 | This script starts up a **temporary** 'cudadev' container on your host using all available gpus. 62 | It assumes a project directory structure that contains a set of special subdirectories where each 63 | subdirectory becomes a mound point within the container. This parent directory should be passed as the argument 64 | into enter_juneberry_container. 65 | 66 | The structure is: 67 | 68 | * juneberry <- This is the Juneberry repo that was pulled 69 | * datasets <- This is where the source data is located, i.e. the "dataroot" that Juneberry will look at. 70 | * tensorboard <- This is where the tensorboard outputs will be stored. 71 | * cache <- This where the model downloads are cached. 72 | 73 | For example, if this structure was in the directory `~/proj` then to use the `enter_juneberry_container` 74 | change into `~/proj` and run: 75 | 76 | `./juneberry/docker/enter_juneberry_container .` 77 | 78 | See the comments within the script for how to configure it to use a cpu-only container, adjust environment 79 | variables, add other mount points and configure gpus. 80 | 81 | ## set_user.sh 82 | 83 | This optional convenience script can create a user inside the container to match an external 84 | user, resulting in the correct permissions for volumes mounted inside the container. See the script 85 | for an explanation of how it works in conjunction with enter_juneberry_container. 86 | 87 | # Copyright 88 | 89 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms. 90 | -------------------------------------------------------------------------------- /juneberry/metrics/objectdetection/brambox/format.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | from typing import Dict 27 | 28 | from juneberry.config.eval_output import Metrics 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | class DefaultFormatter: 34 | def __init__(self): 35 | pass 36 | 37 | def __call__(self, metrics: Dict): 38 | coco_metrics = metrics["juneberry.metrics.objectdetection.brambox.metrics.Coco"] 39 | tide_metrics = metrics["juneberry.metrics.objectdetection.brambox.metrics.Tide"] 40 | summary_metrics = metrics["juneberry.metrics.objectdetection.brambox.metrics.Summary"] 41 | 42 | result = Metrics() 43 | result.bbox = {} 44 | result.bbox_per_class = {} 45 | result.summary = {} 46 | 47 | result.bbox["mAP"] = coco_metrics["mAP_coco"] 48 | result.bbox["mAP_50"] = coco_metrics["mAP_50"] 49 | result.bbox["mAP_75"] = coco_metrics["mAP_75"] 50 | result.bbox["mAP_s"] = coco_metrics["mAP_small"] 51 | result.bbox["mAP_m"] = coco_metrics["mAP_medium"] 52 | result.bbox["mAP_l"] = coco_metrics["mAP_large"] 53 | 54 | result.bbox["mdAP_localisation"] = tide_metrics["mdAP_localisation"] 55 | result.bbox["mdAP_classification"] = tide_metrics["mdAP_classification"] 56 | result.bbox["mdAP_both"] = tide_metrics["mdAP_both"] 57 | result.bbox["mdAP_duplicate"] = tide_metrics["mdAP_duplicate"] 58 | result.bbox["mdAP_background"] = tide_metrics["mdAP_background"] 59 | result.bbox["mdAP_missed"] = tide_metrics["mdAP_missed"] 60 | result.bbox["mdAP_fp"] = tide_metrics["mdAP_fp"] 61 | result.bbox["mdAP_fn"] = tide_metrics["mdAP_fn"] 62 | 63 | for key, value in coco_metrics.items(): 64 | if not key.startswith("mAP"): 65 | result.bbox_per_class["mAP_" + key] = value 66 | 67 | result.summary["pr_auc"] = summary_metrics["pr_auc"] 68 | result.summary["pc_auc"] = summary_metrics["pc_auc"] 69 | result.summary["rc_auc"] = summary_metrics["rc_auc"] 70 | result.summary["max_r"] = summary_metrics["max_r"] 71 | result.summary["ap"] = summary_metrics["ap"] 72 | result.summary["tp"] = summary_metrics["prediction_types"]["tp"] 73 | result.summary["fp"] = summary_metrics["prediction_types"]["fp"] 74 | result.summary["fn"] = summary_metrics["prediction_types"]["fn"] 75 | 76 | return result.to_dict() 77 | -------------------------------------------------------------------------------- /juneberry/onnx/default.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | # ====================================================================================================================== 4 | # Juneberry - Release 0.5 5 | # 6 | # Copyright 2022 Carnegie Mellon University. 7 | # 8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" 9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER 10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED 11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM 12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. 13 | # 14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms. 15 | # 16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see 17 | # Copyright notice for non-US Government use and distribution. 18 | # 19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license. 20 | # 21 | # DM22-0856 22 | # 23 | # ====================================================================================================================== 24 | 25 | import logging 26 | 27 | import juneberry.evaluation.utils as jb_eval_utils 28 | import juneberry.filesystem as jb_fs 29 | from juneberry.onnx.evaluator import Evaluator 30 | from juneberry.onnx.utils import ONNXPlatformDefinitions 31 | import juneberry.pytorch.evaluation.utils as jb_pytorch_eval_utils 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | class OnnxEvaluationOutput: 37 | """ 38 | This is the default ONNX evaluation class used for formatting raw classification evaluation data 39 | in Juneberry. 40 | """ 41 | 42 | def __call__(self, evaluator: Evaluator): 43 | """ 44 | When called, this method uses the attributes of the evaluator to format the raw evaluation data. The 45 | result of the process is the evaluator.output attribute will contain JSON-friendly data, which will 46 | then be written to a file. 47 | :param evaluator: The Evaluator object managing the evaluation. 48 | :return: Nothing. 49 | """ 50 | 51 | # Perform the common eval output processing steps for a classifier. 52 | jb_eval_utils.prepare_classification_eval_output(evaluator) 53 | 54 | # Calculate the hash of the model that was used to conduct the evaluation. 55 | model_path = evaluator.model_manager.get_model_path(ONNXPlatformDefinitions()) 56 | evaluated_model_hash = jb_fs.generate_file_hash(model_path) 57 | 58 | # If the model Juneberry trained the model, a hash would have been calculated after training. 59 | # Compare that hash (if it exists) to the hash of the model being evaluated. 60 | jb_eval_utils.verify_model_hash(evaluator, evaluated_model_hash, onnx=True) 61 | 62 | # If requested, get the top K classes predicted for each input. 63 | if evaluator.top_k: 64 | jb_pytorch_eval_utils.top_k_classifications(evaluator, evaluator.eval_dataset_config.label_names) 65 | 66 | # Save the predictions portion of the evaluation output to the appropriate file. 67 | logger.info(f"Saving predictions to {evaluator.eval_dir_mgr.get_predictions_path()}") 68 | evaluator.output_builder.save_predictions(evaluator.eval_dir_mgr.get_predictions_path()) 69 | 70 | # Save the metrics portion of the evaluation output to the appropriate file. 71 | logger.info(f"Saving metrics to {evaluator.eval_dir_mgr.get_metrics_path()}") 72 | evaluator.output_builder.save_metrics(evaluator.eval_dir_mgr.get_metrics_path()) 73 | -------------------------------------------------------------------------------- /juneberry/schemas/experiment_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft-07/schema", 3 | "$id": "experiment_schema.json", 4 | "type": "object", 5 | "properties": { 6 | "description": { "type": "string" }, 7 | "format_version": { "type": "string" }, 8 | "filter": { 9 | "type": "array", 10 | "items": { 11 | "type": "object", 12 | "properties": { 13 | "tag": { "type": "string" }, 14 | "cmd": { 15 | "type": "array", 16 | "items": { "type": "string" } 17 | }, 18 | "inputs": { 19 | "type": "array", 20 | "items": { "type": "string" } 21 | } 22 | } 23 | } 24 | }, 25 | "models": { 26 | "type": "array", 27 | "items": { 28 | "type": "object", 29 | "properties": { 30 | "filters": { 31 | "type": "array", 32 | "items": { "type": "string" } 33 | }, 34 | "maximum_evaluations": { "type": "integer" }, 35 | "name": { "type": "string" }, 36 | "onnx": { "type": "boolean"}, 37 | "tests": { 38 | "type": "array", 39 | "items": { 40 | "type": "object", 41 | "properties": { 42 | "classify": { "type": "integer" }, 43 | "dataset_path": { "type": "string" }, 44 | "filters": { 45 | "type": "array", 46 | "items": { "type": "string" } 47 | }, 48 | "tag": { "type": "string" }, 49 | "use_train_split": { "type": "boolean" }, 50 | "use_val_split": { "type": "boolean" } 51 | }, 52 | "required": [ "dataset_path", "tag" ] 53 | } 54 | }, 55 | "train": { "type": "boolean" }, 56 | "tuning": { "type": "string" }, 57 | "version": { "type": "string" } 58 | }, 59 | "required": [ "name", "tests" ] 60 | } 61 | }, 62 | "reports": { 63 | "type": "array", 64 | "items": { 65 | "allOf": [{ "$ref": "report_schema.json#/$defs/report"}], 66 | "properties": { 67 | "classes": { "type": "string"}, 68 | "tests": { 69 | "type": "array", 70 | "items": { 71 | "type": "object", 72 | "properties": { 73 | "tag": { "type": "string" }, 74 | "classes": { "type": "string" } 75 | }, 76 | "required": [ "tag" ] 77 | } 78 | } 79 | } 80 | } 81 | }, 82 | "timestamp": { "type": "string" }, 83 | "tuning": { 84 | "type": "array", 85 | "items": { 86 | "type": "object", 87 | "properties": { 88 | "model": { "type": "string" }, 89 | "tuning_config": { "type": "string" } 90 | }, 91 | "required": [ "model", "tuning_config" ] 92 | } 93 | } 94 | }, 95 | "required": [] 96 | } 97 | --------------------------------------------------------------------------------