├── test
├── config
│ ├── __init__.py
│ └── test_training_output.py
├── moddir
│ ├── __init__.py
│ └── simple_mod.py
├── metrics
│ ├── __init__.py
│ ├── classification
│ │ ├── __init__.py
│ │ ├── data
│ │ │ └── config_classification.json
│ │ └── test_classification_metrics.py
│ └── objectdetection
│ │ ├── __init__.py
│ │ └── data
│ │ ├── prc.csv
│ │ ├── fscore.csv
│ │ ├── ground_truth_no_annos.json
│ │ ├── config.json
│ │ └── default_metrics_with_formatter.json
├── mmdetection
│ └── __init__.py
├── pytorch
│ ├── __init__.py
│ └── test_utils.py
├── tf_test
│ └── __init__.py
├── test_logging.py
├── test_image.py
├── test_utils.py
├── test_transforms.py
├── test_tabular_data_set.py
└── test_coco_annotations.py
├── juneberry
├── config
│ ├── __init__.py
│ ├── plugin.py
│ ├── report.py
│ └── hashes.py
├── onnx
│ ├── __init__.py
│ ├── onnx_model_zoo
│ │ └── __init__.py
│ ├── utils.py
│ └── default.py
├── tuning
│ ├── __init__.py
│ └── reporter.py
├── evaluation
│ └── __init__.py
├── metrics
│ ├── __init__.py
│ ├── classification
│ │ ├── __init__.py
│ │ ├── sklearn
│ │ │ ├── __init__.py
│ │ │ └── metrics.py
│ │ ├── torchnn
│ │ │ ├── __init__.py
│ │ │ └── metrics.py
│ │ ├── tensorflow
│ │ │ └── __init__.py
│ │ ├── torchmetrics
│ │ │ ├── __init__.py
│ │ │ └── metrics.py
│ │ └── metrics.py
│ └── objectdetection
│ │ ├── __init__.py
│ │ └── brambox
│ │ ├── __init__.py
│ │ ├── utils.py
│ │ └── format.py
├── pytorch
│ ├── __init__.py
│ ├── torchvision.py
│ ├── privacy
│ │ └── model_transforms.py
│ └── tabular_dataset.py
├── reporting
│ ├── __init__.py
│ └── report.py
├── schemas
│ ├── __init__.py
│ ├── hashes_schema.json
│ ├── workspace_schema.json
│ ├── tuning_schema.json
│ ├── experiment_outline_schema.json
│ ├── rules_list_schema.json
│ ├── property_inference_attack_schema.json
│ ├── tuning_output_schema.json
│ ├── coco_anno_schema.json
│ ├── evaluation_output_schema.json
│ └── experiment_schema.json
├── scripting
│ ├── __init__.py
│ ├── tuning_sprout.py
│ ├── sprout.py
│ └── training_sprout.py
├── tensorflow
│ ├── __init__.py
│ ├── evaluation
│ │ └── __init__.py
│ └── utils.py
├── training
│ └── __init__.py
├── transforms
│ ├── __init__.py
│ ├── tabular.py
│ ├── random_shift.py
│ ├── random_crop_mirror.py
│ ├── random_mirror_flip.py
│ └── image.py
├── __init__.py
├── architectures
│ └── pytorch
│ │ └── torchvision.py
├── detectron2
│ ├── utils.py
│ └── transforms.py
├── platform.py
└── tensorboard.py
├── MANIFEST.in
├── docs
├── specs
│ ├── attack_configuration_specification.md
│ └── rules_list_specification.md
├── logo.png
├── zoo.md
├── software_maintenance.md
└── building_docker.md
├── docker
├── databricks
│ ├── gconf
│ │ ├── gmetad.conf
│ │ ├── databricks-gmond.conf
│ │ └── conf.d
│ │ │ └── modpython.conf
│ ├── spark-slave-not-active
│ ├── ganglia-monitor-not-active
│ ├── gmetad-not-active
│ ├── ganglia.conf
│ ├── build.sh
│ └── monit
├── container_start.sh
├── build.sh
├── set_user.sh
└── run_container_minimal.sh
├── .github
├── workflows
│ ├── docker-retag-cpudev.yml
│ ├── docker-retag-cudadev-latest.yml
│ ├── docker-retag-cudadev.yml
│ ├── docker-retag-cudabricks.yml
│ ├── docker-image-cpudev.yml
│ ├── docker-image-cudadev.yml
│ ├── docker-image-cudabricks-base.yml
│ └── docker-image-cudabricks.yml
└── ISSUE_TEMPLATE
│ ├── feature_request.md
│ └── bug_report.md
├── requirements-cpu.txt
├── requirements.txt
├── .gitignore
├── CONTRIBUTORS.md
├── scripts
├── setup_lab.py
├── dry_run_all_models.py
├── predictions_to_coco.py
├── merge_predictions.py
├── reformat_predictions.py
├── draw_boxes_from_anno_file.py
├── coco_image_use.py
└── model_transform.py
├── README.md
├── LICENSE.txt
├── setup.py
└── bin
├── jb_run_plugin
└── jb_clean_experiment_evals
/test/config/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/moddir/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/config/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/onnx/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/tuning/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/metrics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/mmdetection/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/pytorch/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/tf_test/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/metrics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/pytorch/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/reporting/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/schemas/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/scripting/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/training/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/transforms/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/onnx/onnx_model_zoo/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/tensorflow/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/metrics/classification/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/metrics/objectdetection/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/metrics/classification/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/metrics/objectdetection/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include juneberry/schemas *
2 |
--------------------------------------------------------------------------------
/docs/specs/attack_configuration_specification.md:
--------------------------------------------------------------------------------
1 | TODO
--------------------------------------------------------------------------------
/juneberry/metrics/classification/sklearn/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/metrics/classification/torchnn/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/metrics/objectdetection/brambox/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/metrics/classification/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/juneberry/metrics/classification/torchmetrics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-sei/juneberry/HEAD/docs/logo.png
--------------------------------------------------------------------------------
/juneberry/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | __version__ = "0.5.1"
4 |
--------------------------------------------------------------------------------
/docker/databricks/gconf/gmetad.conf:
--------------------------------------------------------------------------------
1 | data_source "cluster" localhost
2 | setuid_username "ganglia"
3 | xml_port 8651
--------------------------------------------------------------------------------
/juneberry/schemas/hashes_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "description": "A configuration storing model hashes",
4 | "properties": {
5 | "model_architecture": { "type" : "string" }
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/docker/databricks/spark-slave-not-active:
--------------------------------------------------------------------------------
1 | \n\
2 | check process spark-slave with pidfile /tmp/spark-root-org.apache.spark.deploy.worker.Worker-1.pid\n\
3 | start program = "/databricks/spark/scripts/restart-workers"\n\
4 | stop program = "/databricks/spark/scripts/kill_worker.sh"\n\
5 |
--------------------------------------------------------------------------------
/test/metrics/objectdetection/data/prc.csv:
--------------------------------------------------------------------------------
1 | precision,recall,confidence
2 | 0.3333333333333333,0.06666666666666667,0.9
3 | 0.6,0.2,0.8
4 | 0.6666666666666666,0.26666666666666666,0.75
5 | 0.625,0.3333333333333333,0.7
6 | 0.6,0.4,0.6
7 | 0.4666666666666667,0.4666666666666667,0.5
8 | 0.3684210526315789,0.4666666666666667,0.4
9 |
--------------------------------------------------------------------------------
/docker/databricks/ganglia-monitor-not-active:
--------------------------------------------------------------------------------
1 | \n\
2 | check process ganglia-monitor with pidfile /var/run/ganglia-monitor.pid\n\
3 | start program = "/usr/sbin/service ganglia-monitor start"\n\
4 | stop program = "/usr/sbin/service ganglia-monitor stop"\n\
5 | if memory usage > 500 MB for 3 cycles then restart\n\
6 |
--------------------------------------------------------------------------------
/test/metrics/objectdetection/data/fscore.csv:
--------------------------------------------------------------------------------
1 | f1,recall,confidence
2 | 0.1111111111111111,0.06666666666666667,0.9
3 | 0.3,0.2,0.8
4 | 0.3809523809523809,0.26666666666666666,0.75
5 | 0.43478260869565216,0.3333333333333333,0.7
6 | 0.48,0.4,0.6
7 | 0.4666666666666667,0.4666666666666667,0.5
8 | 0.4117647058823529,0.4666666666666667,0.4
9 |
--------------------------------------------------------------------------------
/docker/container_start.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | # Setup juneberry
4 | echo "Installing Juneberry..."
5 | pip install -e /juneberry
6 |
7 | # Add in the bash completion
8 | source /juneberry/scripts/juneberry_completion.sh
9 |
10 | # Install any workspace code
11 | if [ -e "./setup.py" ]; then
12 | echo "Installing workspace..."
13 | pip install -e .
14 | fi
15 |
16 |
--------------------------------------------------------------------------------
/docker/databricks/gconf/databricks-gmond.conf:
--------------------------------------------------------------------------------
1 | cluster {
2 | name = "cluster"
3 | owner = "unspecified"
4 | latlong = "unspecified"
5 | url = "unspecified"
6 | }
7 |
8 | udp_send_channel {
9 | host = 10.126.246.34
10 | port = 8649
11 | ttl = 1
12 | }
13 |
14 | /* This is set for gmond running on the driver to receive metrics from other gmonds, and is not
15 | in use for gmonds running on workers. */
16 | udp_recv_channel {
17 | port = 8649
18 | }
--------------------------------------------------------------------------------
/docker/databricks/gconf/conf.d/modpython.conf:
--------------------------------------------------------------------------------
1 | /*
2 | params - path to the directory where mod_python
3 | should look for python metric modules
4 |
5 | the "pyconf" files in the include directory below
6 | will be scanned for configurations for those modules
7 | */
8 | modules {
9 | module {
10 | name = "python_module"
11 | path = "/usr/lib/ganglia/modpython.so"
12 | params = "/usr/lib/ganglia/python_modules"
13 | }
14 | }
15 |
16 | include ('/etc/ganglia/conf.d/*.pyconf')
17 |
--------------------------------------------------------------------------------
/docker/databricks/gmetad-not-active:
--------------------------------------------------------------------------------
1 | \n\
2 | check process gmetad with pidfile /var/run/gmetad.pid\n\
3 | start program = "/usr/sbin/service gmetad start"\n\
4 | stop program = "/usr/sbin/service gmetad stop"\n\
5 | if memory usage > 500 MB for 3 cycles then restart\n\
6 | \n\
7 | check process apache2 with pidfile /var/run/apache2/apache2.pid\n\
8 | start program = "/usr/sbin/service apache2 start"\n\
9 | stop program = "/usr/sbin/service apache2 stop"\n\
10 | if memory usage > 500 MB for 3 cycles then restart\n\
11 |
--------------------------------------------------------------------------------
/.github/workflows/docker-retag-cpudev.yml:
--------------------------------------------------------------------------------
1 | name: Retag cpudev-dev to cpudev
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Retag cpudev-dev to cpudev
15 | uses: tinact/docker.image-retag@master
16 | with:
17 | image_name: cmusei/juneberry
18 | image_old_tag: cpudev-dev
19 | image_new_tag: cpudev
20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }}
21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }}
22 |
--------------------------------------------------------------------------------
/.github/workflows/docker-retag-cudadev-latest.yml:
--------------------------------------------------------------------------------
1 | name: Retag cudadev to latest
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Retag cudadev to latest
15 | uses: tinact/docker.image-retag@master
16 | with:
17 | image_name: cmusei/juneberry
18 | image_old_tag: cudadev
19 | image_new_tag: latest
20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }}
21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }}
22 |
--------------------------------------------------------------------------------
/.github/workflows/docker-retag-cudadev.yml:
--------------------------------------------------------------------------------
1 | name: Retag cudadev-dev to cudadev
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Retag cudadev-dev to cudadev
15 | uses: tinact/docker.image-retag@master
16 | with:
17 | image_name: cmusei/juneberry
18 | image_old_tag: cudadev-dev
19 | image_new_tag: cudadev
20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }}
21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }}
22 |
--------------------------------------------------------------------------------
/requirements-cpu.txt:
--------------------------------------------------------------------------------
1 | tensorflow==2.7.0
2 | tensorflow-datasets==4.4.0
3 | doit==0.33.1
4 | numpy==1.19.5
5 | pycocotools==2.0.2
6 | matplotlib==3.4.3
7 | Pillow==8.3.2
8 | prodict==0.8.18
9 | hjson==3.0.2
10 | jsonschema==4.1.0
11 | sklearn==0.0
12 | tensorboard==2.6.0
13 | torch==1.8.0
14 | torchvision==0.9.0
15 | torch-summary==1.4.5
16 | pandas==1.3.3
17 | brambox==3.2.0
18 | PyYAML==5.4.1
19 | natsort==7.1.1
20 | opacus==0.14.0
21 | protobuf==3.16.0
22 | onnx==1.10.1
23 | onnxruntime==1.10.0
24 | tf2onnx==1.9.3
25 | opencv-python==4.5.5.62
26 | tqdm==4.62.3
27 | pytest==6.2.5
28 | pylint==2.10.2
29 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==2.7.0
2 | tensorflow-datasets==4.4.0
3 | doit==0.33.1
4 | numpy==1.19.5
5 | pycocotools==2.0.2
6 | matplotlib==3.4.3
7 | Pillow==8.3.2
8 | prodict==0.8.18
9 | hjson==3.0.2
10 | jsonschema==4.1.0
11 | sklearn==0.0
12 | tensorboard==2.6.0
13 | torch==1.8.0
14 | torchvision==0.9.0
15 | torch-summary==1.4.5
16 | pandas==1.3.3
17 | brambox==3.2.0
18 | PyYAML==5.4.1
19 | natsort==7.1.1
20 | opacus==0.14.0
21 | protobuf==3.16.0
22 | onnx==1.10.1
23 | onnxruntime-gpu==1.10.0
24 | tf2onnx==1.9.3
25 | opencv-python==4.5.5.62
26 | tqdm==4.62.3
27 | pytest==6.2.5
28 | pylint==2.10.2
29 |
--------------------------------------------------------------------------------
/.github/workflows/docker-retag-cudabricks.yml:
--------------------------------------------------------------------------------
1 | name: Retag cudabricks-dev to cudabricks
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Retag cudabricks-dev to cudabricks
15 | uses: tinact/docker.image-retag@master
16 | with:
17 | image_name: cmusei/juneberry
18 | image_old_tag: cudabricks-dev
19 | image_new_tag: cudabricks
20 | registry_username: ${{ secrets.DOCKERHUB_USERNAME }}
21 | registry_password: ${{ secrets.DOCKERHUB_PASSWORD }}
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | *.log
3 | *.png
4 | *.pyc
5 | *.pt
6 | *.swp
7 | .DS_Store
8 | .idea/
9 | .vscode/
10 | .venv/
11 | .dvc/
12 | Juneberry.egg-info
13 | .coverage
14 | htmlcov/
15 |
16 | # Ignore models except the config and DVC files
17 | models/*/*
18 | !models/**/config.json
19 | !models/**/*.dvc
20 |
21 | experiments/*/*
22 | !experiments/**/*.json
23 | !experiments/**/*.dvc
24 | experiments/**/rules.json
25 |
26 |
27 | # The checkpoints directory will contain checkpoints from the container
28 | checkpoints/
29 |
30 | # Ignore some tool support directories
31 | scripts/resnet-summaries/latest
32 |
33 | # A place to hide things
34 | nothingtoseehere
35 |
36 | # We don't want doit stuff
37 | **/.doit.db*
38 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is. Here are some of the things we'd like to know.
12 |
13 | **To Reproduce**
14 | - Which tool (train, evaluate)
15 | - What model architecture? (Add config file?)
16 | - What dataset? (Add config file?)
17 |
18 | **Expected behavior**
19 | A clear and concise description of what you expected to happen.
20 |
21 | **Screenshots or log output**
22 | If applicable, add screenshots to help explain your problem.
23 |
24 | **Platform**
25 | - Which container version?
26 | - Does it happen in CPU or GPU mode only? If GPU how many GPUs?
27 |
28 | **Additional context**
29 | Add any other context about the problem here.
30 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | # Version 0.4 Contributors
2 |
3 | * Andrew Mellinger - SEI/CMU
4 | * Annika Horgan - SEI/CMU
5 | * Anusha Sinha - SEI/CMU
6 | * Jay Palat - SEI/CMU
7 | * Matthew Churilla - SIE/CMU
8 | * Michael Vincente - SEI/CMU
9 | * Nathan VanHoudnos - SEI/CMU
10 | * Nick Winski - SEI/CMU
11 | * Robert Beveridge - SEI/CMU
12 | * Todd Loizes - SEI/CMU
13 | * Violet Turri - SEI/CMU
14 | * William R. Nichols - SEI/CMU
15 |
16 | # Version 0.5 Contributors
17 |
18 | * Andrew Mellinger - SEI/CMU
19 | * Anusha Sinha - SEI/CMU
20 | * Bill Shaw - SEI/CMU
21 | * Bryan Brown - SEI/CMU
22 | * Daniel Justice - SEI/CMU
23 | * Dominique Mittermeier - SEI/CMU
24 | * Hayden Moore - SEI/CMU
25 | * John Zucca - SEI/CMU
26 | * Jon Helland - SEI/CMU
27 | * Jordan Widjaja - SEI/CMU
28 | * Matthew Churilla - SIE/CMU
29 | * Nathan VanHoudnos - SEI/CMU
30 | * Nick Winski - SEI/CMU
31 | * Shannon Gallagher - SEI/CMU
32 | * William R. Nichols - SEI/CMU
33 |
--------------------------------------------------------------------------------
/test/metrics/objectdetection/data/ground_truth_no_annos.json:
--------------------------------------------------------------------------------
1 | {
2 | "annotations": [
3 | ],
4 | "images": [
5 | {
6 | "file_name": "img_1.jpg",
7 | "coco_url": "",
8 | "height": 200,
9 | "width": 200,
10 | "id": "1"
11 | },
12 | {
13 | "file_name": "img_2.jpg",
14 | "coco_url": "",
15 | "height": 200,
16 | "width": 200,
17 | "id": "2"
18 | }
19 | ],
20 | "categories": [
21 | {
22 | "supercategory": "sc_1",
23 | "id": 1,
24 | "name": "class_1"
25 | },
26 | {
27 | "supercategory": "sc_1",
28 | "id": 2,
29 | "name": "class_2"
30 | }
31 | ],
32 | "info": {
33 | "description": "Metrics Unit Test (no annos)",
34 | "year": 2021,
35 | "date_created": "2021/11/11"
36 | }
37 | }
--------------------------------------------------------------------------------
/docker/build.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | if [ $# -lt 1 ]; then
4 | echo "This script requires one argument, the part BEFORE the '.Dockerfile'"
5 | echo "e.g. 'cudadev' or 'cpudev'"
6 | echo "Optionally a docker tag can be provided as a second argument otherwise 'dev' is used."
7 | exit -1
8 | fi
9 |
10 | REV="dev"
11 | if [ $# -eq 2 ]; then
12 | REV=${2}
13 | fi
14 |
15 | TARGET_TAG="juneberry/${1}:${REV}"
16 | SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
17 | DOCKERFILE=${SCRIPT_DIR}/${1}.Dockerfile
18 |
19 | echo "Building: ${DOCKERFILE} into ${TARGET_TAG}"
20 |
21 | docker build --no-cache \
22 | --build-arg HTTP_PROXY=${HTTP_PROXY} \
23 | --build-arg http_proxy=${http_proxy} \
24 | --build-arg HTTPS_PROXY=${HTTPS_PROXY} \
25 | --build-arg https_proxy=${https_proxy} \
26 | --build-arg NO_PROXY=${NO_PROXY} \
27 | --build-arg no_proxy=${no_proxy} \
28 | --network=host -f "${DOCKERFILE}" -t ${TARGET_TAG} ${SCRIPT_DIR}
29 |
--------------------------------------------------------------------------------
/test/metrics/objectdetection/data/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "evaluation_metrics": [
3 | {
4 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Summary",
5 | "kwargs": {
6 | "iou_threshold": 0.5,
7 | "tp_threshold": 0.8
8 | }
9 | },
10 | {
11 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Coco",
12 | "kwargs": {
13 | "iou_threshold": 0.5,
14 | "max_det": 100,
15 | "tqdm": false
16 | }
17 | },
18 | {
19 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Tide",
20 | "kwargs": {
21 | "pos_thresh": 0.5,
22 | "bg_thresh": 0.5,
23 | "max_det": 100,
24 | "area_range_min": 0,
25 | "area_range_max": 100000,
26 | "tqdm": false
27 | }
28 | }
29 | ]
30 | }
--------------------------------------------------------------------------------
/juneberry/schemas/workspace_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "description": "A configuration for the workspace",
4 | "properties": {
5 | "profiles": {
6 | "type": "array",
7 | "items": {
8 | "type": "object",
9 | "properties": {
10 | "include": {"type": "string"},
11 | "model": {"type": "string"},
12 | "name": {"type": "string"},
13 | "profile": {
14 | "type": "object",
15 | "properties": {
16 | "max_gpus": {"type": "integer"},
17 | "no_paging": {"type": "boolean"},
18 | "num_gpus": {"type": "integer"},
19 | "num_workers": {"type": "integer"}
20 | }
21 | }
22 | },
23 | "required": ["model", "name", "profile"]
24 | }
25 | }
26 | }
27 | }
--------------------------------------------------------------------------------
/docker/databricks/ganglia.conf:
--------------------------------------------------------------------------------
1 |
2 | ServerAdmin webmaster@localhost
3 |
4 | # Show Ganglia UI by default
5 | DocumentRoot /usr/share/ganglia-webfrontend
6 |
7 | # Available loglevels: trace8, ..., trace1, debug, info, notice, warn,
8 | # error, crit, alert, emerg.
9 | # It is also possible to configure the loglevel for particular
10 | # modules, e.g.
11 | #LogLevel info ssl:warn
12 |
13 | ErrorLog ${APACHE_LOG_DIR}/error.log
14 | CustomLog ${APACHE_LOG_DIR}/access.log combined
15 |
16 | # For most configuration files from conf-available/, which are
17 | # enabled or disabled at a global level, it is possible to
18 | # include a line for only one particular virtual host. For example the
19 | # following line enables the CGI configuration for this host only
20 | # after it has been globally disabled with "a2disconf".
21 | #Include conf-available/serve-cgi-bin.conf
22 |
23 |
24 | # vim: syntax=apache ts=4 sw=4 sts=4 sr noet
25 |
--------------------------------------------------------------------------------
/docker/databricks/build.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | REV="dev"
4 | if [ $# -eq 1 ]; then
5 | REV=${1}
6 | fi
7 |
8 | # SCRIPT DIR is our context dir.
9 | SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
10 |
11 | TARGET_TAG="juneberry/cudabricks-base:${REV}"
12 |
13 | echo "Building: Base into ${TARGET_TAG}"
14 |
15 | docker build --no-cache \
16 | --build-arg HTTP_PROXY=${HTTP_PROXY} \
17 | --build-arg http_proxy=${http_proxy} \
18 | --build-arg HTTPS_PROXY=${HTTPS_PROXY} \
19 | --build-arg https_proxy=${https_proxy} \
20 | --build-arg NO_PROXY=${NO_PROXY} \
21 | --build-arg no_proxy=${no_proxy} \
22 | --network=host -f base.Dockerfile -t ${TARGET_TAG} ${SCRIPT_DIR}
23 |
24 | TARGET_TAG="juneberry/cudabricks:${REV}"
25 |
26 | echo "Building: Ganglia into ${TARGET_TAG}"
27 |
28 | docker build --no-cache \
29 | --build-arg HTTP_PROXY=${HTTP_PROXY} \
30 | --build-arg http_proxy=${http_proxy} \
31 | --build-arg HTTPS_PROXY=${HTTPS_PROXY} \
32 | --build-arg https_proxy=${https_proxy} \
33 | --build-arg NO_PROXY=${NO_PROXY} \
34 | --build-arg no_proxy=${no_proxy} \
35 | --network=host -f ganglia.Dockerfile -t ${TARGET_TAG} ${SCRIPT_DIR}
36 |
--------------------------------------------------------------------------------
/test/metrics/objectdetection/data/default_metrics_with_formatter.json:
--------------------------------------------------------------------------------
1 | {
2 | "evaluation_metrics": [
3 | {
4 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Coco",
5 | "kwargs": {
6 | "iou_threshold": 0.5,
7 | "max_det": 100,
8 | "tqdm": false
9 | }
10 | },
11 | {
12 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Summary",
13 | "kwargs": {
14 | "iou_threshold": 0.5,
15 | "tp_threshold": 0.8
16 | }
17 | },
18 | {
19 | "fqcn": "juneberry.metrics.objectdetection.brambox.metrics.Tide",
20 | "kwargs": {
21 | "pos_thresh": 0.5,
22 | "bg_thresh": 0.5,
23 | "max_det": 100,
24 | "area_range_min": 0,
25 | "area_range_max": 100000,
26 | "tqdm": false
27 | }
28 | }
29 | ],
30 | "evaluation_metrics_formatter": {
31 | "fqcn": "juneberry.metrics.objectdetection.brambox.format.DefaultFormatter",
32 | "kwargs": {
33 | }
34 | }
35 | }
--------------------------------------------------------------------------------
/.github/workflows/docker-image-cpudev.yml:
--------------------------------------------------------------------------------
1 | name: Docker Image CI CPUDev
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Check base machine storage
15 | run: df -h /
16 | -
17 | name: Delete unused packages and tools
18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell &&
19 | sudo apt-get autoremove -y &&
20 | sudo apt-get clean &&
21 | rm -rf /usr/share/dotnet/
22 | rm -rf /opt/hostedtoolcache
23 | -
24 | name: Check base machine storage (after delete)
25 | run: df -h /
26 | -
27 | name: Checkout
28 | uses: actions/checkout@v2
29 | -
30 | name: Login to DockerHub
31 | uses: docker/login-action@v1
32 | with:
33 | username: ${{ secrets.DOCKERHUB_USERNAME }}
34 | password: ${{ secrets.DOCKERHUB_PASSWORD }}
35 | -
36 | name: Build and push
37 | uses: docker/build-push-action@v2
38 | with:
39 | context: ./docker
40 | push: true
41 | file: docker/cpudev.Dockerfile
42 | tags: cmusei/juneberry:cpudev-dev
43 |
--------------------------------------------------------------------------------
/.github/workflows/docker-image-cudadev.yml:
--------------------------------------------------------------------------------
1 | name: Docker Image CI CUDADev
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Check base machine storage
15 | run: df -h /
16 | -
17 | name: Delete unused packages and tools
18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell &&
19 | sudo apt-get autoremove -y &&
20 | sudo apt-get clean &&
21 | rm -rf /usr/share/dotnet/
22 | rm -rf /opt/hostedtoolcache
23 | -
24 | name: Check base machine storage (after delete)
25 | run: df -h /
26 | -
27 | name: Checkout
28 | uses: actions/checkout@v2
29 | -
30 | name: Login to DockerHub
31 | uses: docker/login-action@v1
32 | with:
33 | username: ${{ secrets.DOCKERHUB_USERNAME }}
34 | password: ${{ secrets.DOCKERHUB_PASSWORD }}
35 | -
36 | name: Build and push
37 | uses: docker/build-push-action@v2
38 | with:
39 | context: ./docker
40 | push: true
41 | file: docker/cudadev.Dockerfile
42 | tags: cmusei/juneberry:cudadev-dev
43 |
--------------------------------------------------------------------------------
/.github/workflows/docker-image-cudabricks-base.yml:
--------------------------------------------------------------------------------
1 | name: Docker Image CI CUDABricksBase
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Check base machine storage
15 | run: df -h /
16 | -
17 | name: Delete unused packages and tools
18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell &&
19 | sudo apt-get autoremove -y &&
20 | sudo apt-get clean &&
21 | rm -rf /usr/share/dotnet/
22 | rm -rf /opt/hostedtoolcache
23 | -
24 | name: Check base machine storage (after delete)
25 | run: df -h /
26 | -
27 | name: Checkout
28 | uses: actions/checkout@v2
29 | -
30 | name: Login to DockerHub
31 | uses: docker/login-action@v1
32 | with:
33 | username: ${{ secrets.DOCKERHUB_USERNAME }}
34 | password: ${{ secrets.DOCKERHUB_PASSWORD }}
35 | -
36 | name: Build and push
37 | uses: docker/build-push-action@v2
38 | with:
39 | context: ./docker/databricks
40 | push: true
41 | file: docker/databricks/base.Dockerfile
42 | tags: cmusei/juneberry:cudabricks-base
43 |
--------------------------------------------------------------------------------
/.github/workflows/docker-image-cudabricks.yml:
--------------------------------------------------------------------------------
1 | name: Docker Image CI CUDABricks
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | -
14 | name: Check base machine storage
15 | run: df -h /
16 | -
17 | name: Delete unused packages and tools
18 | run: sudo apt-get remove -y azure-cli google-cloud-sdk google-chrome-stable firefox powershell &&
19 | sudo apt-get autoremove -y &&
20 | sudo apt-get clean &&
21 | rm -rf /usr/share/dotnet/
22 | rm -rf /opt/hostedtoolcache
23 | -
24 | name: Check base machine storage (after delete)
25 | run: df -h /
26 | -
27 | name: Checkout
28 | uses: actions/checkout@v2
29 | -
30 | name: Login to DockerHub
31 | uses: docker/login-action@v1
32 | with:
33 | username: ${{ secrets.DOCKERHUB_USERNAME }}
34 | password: ${{ secrets.DOCKERHUB_PASSWORD }}
35 | -
36 | name: Build and push
37 | uses: docker/build-push-action@v2
38 | with:
39 | context: ./docker/databricks
40 | push: true
41 | file: docker/databricks/ganglia.Dockerfile
42 | tags: cmusei/juneberry:cudabricks-dev
43 |
--------------------------------------------------------------------------------
/juneberry/config/plugin.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | from prodict import Prodict
26 |
27 |
28 | class Plugin(Prodict):
29 | fqcn: str
30 | kwargs: Prodict
31 |
--------------------------------------------------------------------------------
/juneberry/onnx/utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | from juneberry.platform import PlatformDefinitions
26 |
27 |
28 | class ONNXPlatformDefinitions(PlatformDefinitions):
29 | def get_model_filename(self):
30 | """ :return: The name of the model file that the trainer saves and what evaluators should load"""
31 | return "model.onnx"
32 |
--------------------------------------------------------------------------------
/docker/set_user.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | # This convenience script is designed to be executed INSIDE the juneberry container
4 | # to create a user and group with IDs that work well with the mounted volumes. This script
5 | # requires that various user information is passed into the container. This user information
6 | # is:
7 | # - user name as USER_NAME
8 | # - user id as USER_ID
9 | # - user group id as USER_GID
10 | #
11 | # For example, using these options to docker run:
12 | #
13 | # e USER_NAME=${USER} -e USER_ID=`id -u ${USER}` -e USER_GID=`id -g ${USER}`
14 | #
15 | # Or via this line in enter_juneberry_container:
16 | #
17 | # ENVS_USER="-e USER_NAME=${USER} -e USER_ID=`id -u ${USER}` -e USER_GID=`id -g ${USER}`"
18 | #
19 | # As this is all temporary, this script needs to be executed on every new container instantiation.
20 |
21 | # Add the group and user if all three of these are set.
22 | if test -n "${USER_NAME}" && test -n "${USER_ID}" && test -n "${USER_GID}"; then
23 | groupadd -g ${USER_GID} domain_users
24 | useradd -m -s /bin/bash -u ${USER_ID} -g ${USER_GID} -G root ${USER_NAME}
25 |
26 | # Add the default path to the bashrc. In the case of the nvidia containers, they have
27 | # manually set the path via docker file not in a global bashrc or profile,
28 | # so we have no way to source that in our bashrc.
29 | echo "export PATH=${PATH}" >> /home/${USER_NAME}/.bashrc
30 |
31 | # Provide sudo access to mimic a normal system where the user has sudo access.
32 | echo "${USER_NAME} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
33 |
34 | # Now, set us to that user.
35 | su ${USER_NAME}
36 | else
37 | echo "Not setting user because USER_NAME, USER_ID, or USER_GID not set."
38 | fi
39 |
--------------------------------------------------------------------------------
/test/metrics/classification/data/config_classification.json:
--------------------------------------------------------------------------------
1 | {
2 | "metrics": [
3 | {
4 | "fqcn": "juneberry.metrics.classification.sklearn.metrics.Metrics",
5 | "kwargs": {
6 | "fqn": "sklearn.metrics.accuracy_score",
7 | "name": "accuracy_score",
8 | "kwargs": {
9 | "normalize": false
10 | }
11 | }
12 | },
13 | {
14 | "fqcn": "juneberry.metrics.classification.torchmetrics.metrics.Metrics",
15 | "kwargs": {
16 | "fqn": "torchmetrics.functional.accuracy",
17 | "name": "func_accuracy",
18 | "kwargs": {
19 | "top_k": 2
20 | }
21 | }
22 | },
23 | {
24 | "fqcn": "juneberry.metrics.classification.torchmetrics.metrics.Metrics",
25 | "kwargs": {
26 | "fqn": "torchmetrics.Accuracy",
27 | "name": "obj_accuracy",
28 | "kwargs": {
29 | "top_k": 2
30 | }
31 | }
32 | },
33 | {
34 | "fqcn": "juneberry.metrics.classification.torchnn.metrics.Metrics",
35 | "kwargs": {
36 | "fqn": "torch.nn.CrossEntropyLoss",
37 | "name": "loss",
38 | "kwargs": {}
39 | }
40 | },
41 | {
42 | "fqcn": "juneberry.metrics.classification.tensorflow.metrics.Metrics",
43 | "kwargs": {
44 | "fqn": "tensorflow.keras.metrics.Accuracy",
45 | "name": "tf_accuracy",
46 | "kwargs": {
47 | "standalone": true
48 | }
49 | }
50 | },
51 | {
52 | "fqcn": "juneberry.metrics.classification.tensorflow.metrics.Metrics",
53 | "kwargs": {
54 | "fqn": "tensorflow.keras.metrics.binary_accuracy",
55 | "name": "tf_binary_accuracy",
56 | "kwargs": {
57 | "standalone": true
58 | }
59 | }
60 | }
61 | ]
62 | }
--------------------------------------------------------------------------------
/juneberry/pytorch/torchvision.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import importlib
26 |
27 |
28 | class Wrapper:
29 | """
30 | Basic wrapper for torchvision model classes where class_name
31 | specifies the class name and class_args is a dictionary of
32 | args to be passed (expanded) to the constructor.
33 | """
34 |
35 | def __call__(self, class_name, class_args, num_classes):
36 | mod = importlib.import_module('torchvision.models')
37 | my_class = getattr(mod, class_name)
38 | return my_class(**class_args)
39 |
--------------------------------------------------------------------------------
/docs/zoo.md:
--------------------------------------------------------------------------------
1 | Model Zoo Overview
2 | ==========
3 |
4 | # Introduction
5 |
6 | Juneberry supports the idea of a model zoo, which contains config files and pre-trained models that
7 | can be downloaded and used in Juneberry. Model zoo files are stored on remote servers in a hierarchy
8 | similar to how the 'models' directory is organized. Consider the following path to a zoo file:
9 |
10 | "https://juneberry.com/models/my-model/resnet.zip"
11 |
12 | - https://juneberry.com/models - The base url of the server where the models are stored.
13 | - my-model/resnet18.zip - A zip containing the model named "my-model/resnet18".
14 |
15 | # Packaging a model
16 |
17 | The model zip file contains any necessary files required to share the model. These typically include the
18 | following:
19 |
20 | - config.json
21 | - model.pt or model.h5
22 | - (optional) hashes.json
23 |
24 | When provided, the hashes.json file can confirm which model architecture was used to generate
25 | the model. If the model_architecture hash embedded inside the hashes.json does NOT match the hash of
26 | locally constructed model architecture summary, then the model will not be loaded from the zoo and an
27 | error is generated. During training, a "hashes-latest.json" file will be produced which contains the
28 | model_architecture hash that was used to train the model.
29 |
30 | A convenience tool is provided which packages up the zip file. To invoke the tool, specify the model and
31 | a directory representing a staging area for zip files to be uploaded to the zoo. The tool expects to be run from
32 | the root of the workspace. Consider the following command:
33 |
34 | `python -m juneberry.zoo my-model/resnet18 ./zoo-staging`
35 |
36 | This command would create the file "./zoo-staging/my-model/resnet18.zip" containing the model's config file, the
37 | model.pt (assuming it is a PyTorch model), and a hashes.json file (if one exists) or a copy
38 | of "hashes-latest.json", if one exists, renamed to "hashes.json".
--------------------------------------------------------------------------------
/juneberry/transforms/tabular.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | Simple transforms for tabular data.
27 | """
28 |
29 |
30 | class RemoveColumns:
31 | def __init__(self, indexes: list):
32 | self.reverse_indexes = indexes.copy()
33 | self.reverse_indexes.sort(reverse=True)
34 |
35 | def __call__(self, row):
36 | """
37 | Transformation function that removes the value from the row.
38 | """
39 | # TODO: Probably a better way to do this in pandas or something else via a "view" wrapper and just
40 | # remap indices.
41 | row_copy = row.copy()
42 | for idx in self.reverse_indexes:
43 | del row_copy[idx]
44 | return row_copy
45 |
--------------------------------------------------------------------------------
/juneberry/pytorch/privacy/model_transforms.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 |
27 | from opacus.dp_model_inspector import DPModelInspector
28 | from opacus.utils import module_modification
29 |
30 | logger = logging.getLogger(__name__)
31 |
32 |
33 | class ConvertBatchnormModules:
34 | """
35 | Transform used to convert bactchnorms for use in opacus
36 | """
37 |
38 | def __call__(self, model):
39 | logger.info(f"Attempting conversion of batchnorm modules.")
40 | model = module_modification.convert_batchnorm_modules(model)
41 | inspector = DPModelInspector()
42 | logger.info(f"... Is converted model valid for DPSGD?: {inspector.validate(model)}")
43 |
44 | return model
45 |
--------------------------------------------------------------------------------
/juneberry/tuning/reporter.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 | from typing import Dict, List
27 |
28 | from ray.tune import CLIReporter
29 |
30 | logger = logging.getLogger(__name__)
31 |
32 |
33 | class CustomReporter(CLIReporter):
34 | """
35 | This class is responsible for altering the logging behavior in Ray Tune. By default, Ray Tune just
36 | prints its log messages. This CustomReporter overrides that print statement and changes it to a
37 | logger message, so a record of the message will be maintained inside Juneberry log files.
38 | """
39 |
40 | def report(self, trials: List, done: bool, *sys_info: Dict):
41 | logger.info(f"Status Message from Ray Tune:\n{self._progress_str(trials, done, *sys_info)}")
42 |
--------------------------------------------------------------------------------
/juneberry/schemas/tuning_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "$id": "tuning_schema.json",
4 | "definitions": {
5 | "plugin_item": {
6 | "type": "object",
7 | "properties": {
8 | "fqcn": { "type": "string" },
9 | "kwargs": { "type": "object" }
10 | },
11 | "required": [ "fqcn" ]
12 | }
13 | },
14 | "type": "object",
15 | "description": "A configuration for tuning hyperparameters in a Juneberry model.",
16 | "properties": {
17 | "description": { "type": "string" },
18 | "format_version": { "type": "string" },
19 | "num_samples": { "type": "integer" },
20 | "scheduler": { "$ref": "#/definitions/plugin_item" },
21 | "search_algorithm": { "$ref": "#/definitions/plugin_item" },
22 | "search_space": {
23 | "type": "array",
24 | "items": {
25 | "type": "object",
26 | "properties": {
27 | "fqcn": { "type": "string" },
28 | "hyperparameter_name": { "type": "string" },
29 | "kwargs": { "type": "object" }
30 | },
31 | "required" : [ "hyperparameter_name", "fqcn" ]
32 | }
33 | },
34 | "timestamp": { "type": "string" },
35 | "trial_resources": {
36 | "type": "object",
37 | "properties": {
38 | "cpu": { "type": "integer" },
39 | "gpu": { "type": "integer" }
40 | }
41 | },
42 | "tuning_parameters": {
43 | "type": "object",
44 | "properties": {
45 | "checkpoint_interval": { "type": "integer" },
46 | "metric": { "type": "string" },
47 | "mode": { "type": "string", "enum": [ "min", "max" ] },
48 | "scope": { "type": "string", "enum": [ "all", "last", "avg", "last-5-avg", "last-10-avg" ] }
49 | }
50 | }
51 | },
52 | "required": [ "search_space" ]
53 | }
--------------------------------------------------------------------------------
/juneberry/metrics/classification/metrics.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - General Release
5 | #
6 | # Copyright 2021 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software subject to its own license.
20 | #
21 | # DM21-0884
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | This module provides a common classification metric plugin init to be used with Juneberry.
27 | """
28 | import logging
29 | from typing import Dict
30 |
31 | logger = logging.getLogger(__name__)
32 |
33 |
34 | class MetricsBase:
35 |
36 | def __init__(self,
37 | fqn: str,
38 | name: str,
39 | kwargs: Dict = None) -> None:
40 | self.fqn = fqn
41 | self.name = name
42 | self.kwargs = kwargs
43 |
44 | if not name or name == "":
45 | log_msg = f"Unable to init metrics: fqn={self.fqn}, kwargs={self.kwargs}. Missing 'name' parameter."
46 | logger.error(log_msg)
47 | raise ValueError(log_msg)
48 |
--------------------------------------------------------------------------------
/juneberry/architectures/pytorch/torchvision.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import importlib
26 | import logging
27 |
28 | logger = logging.getLogger(__name__)
29 |
30 |
31 | class Wrapper:
32 | """
33 | Basic wrapper for torchvision models classes
34 | """
35 |
36 | def __call__(self, className, classArgs, num_classes):
37 | logger.warning("This class is deprecated! Please use:")
38 | logger.warning(" juneberry.pytorch.torchvision.Wrapper.")
39 | logger.warning("NOTE! The kwargs names change as well:")
40 | logger.warning(" className -> class_name")
41 | logger.warning(" classArgs -> class_args")
42 | mod = importlib.import_module('torchvision.models')
43 | my_class = getattr(mod, className)
44 | return my_class(**classArgs)
45 |
--------------------------------------------------------------------------------
/juneberry/transforms/random_shift.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | Transformer to random shift an image. It requires a configuration with two arguments of the maximum number
27 | of pixels to shift or mirror the image.
28 |
29 | "config": { "max_width": 0.0, "max_height": 0.0 }
30 |
31 | """
32 |
33 | import juneberry.image as jb_img_utils
34 |
35 |
36 | class RandomShift:
37 | def __init__(self, max_width, max_height):
38 | self.max_width = max_width
39 | self.max_height = max_height
40 |
41 | def __call__(self, image):
42 | """
43 | Transformation function that is provided a PIL image.
44 | :param image: The source PIL image.
45 | :return: The transformed PIL image.
46 | """
47 | return jb_img_utils.random_shift_image(image, self.max_width, self.max_height)
48 |
--------------------------------------------------------------------------------
/juneberry/detectron2/utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | from juneberry.pytorch.utils import PyTorchPlatformDefinitions
26 |
27 |
28 | class DT2PlatformDefinitions(PyTorchPlatformDefinitions):
29 | def get_config_suffix(self) -> str:
30 | """
31 | Before training we emit the fully realized configuration file used by the platform. Different backend platforms
32 | use different file types, and while we name them all "platform_config", they need to have the correct
33 | suffix and format. This routine returns the suffix used by the platform, such as ".json" or ".yaml." The
34 | default format is ".json"
35 | :return: The suffix used when saving realized platform_config file before training.
36 | """
37 | return ".yaml"
38 |
39 | def has_platform_config(self) -> bool:
40 | return True
41 |
--------------------------------------------------------------------------------
/juneberry/schemas/experiment_outline_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "type": "object",
4 | "properties": {
5 | "baseline_config": { "type": "string" },
6 | "description": { "type": "string" },
7 | "filters": { "type": "array" },
8 | "format_version": { "type": "string" },
9 | "model": {
10 | "type": "object",
11 | "properties": {
12 | "filters": { "type": "array" },
13 | "maximum_evaluations": { "type": "integer" }
14 | }
15 | },
16 | "reports": {
17 | "type": "array",
18 | "items": {
19 | "allOf": [{ "$ref": "report_schema.json#/$defs/report"}],
20 | "properties": {
21 | "classes": { "type": "string"},
22 | "test_tag": { "type": "string" }
23 | }
24 | }
25 | },
26 | "tests": {
27 | "type": "array",
28 | "items": {
29 | "type": "object",
30 | "properties": {
31 | "tag": { "type": "string" },
32 | "dataset_path": { "type": "string" },
33 | "classify": { "type": "integer" }
34 | },
35 | "required": [ "classify", "dataset_path", "tag" ]
36 | }
37 | },
38 | "timestamp": {
39 | "type": "string"
40 | },
41 | "variables": {
42 | "type": "array",
43 | "items": {
44 | "type": "object",
45 | "properties": {
46 | "nickname": { "type": "string" },
47 | "config_field": { "type": "string" },
48 | "vals": {
49 | "anyof": [
50 | { "type": "array" },
51 | { "type": "string" }
52 | ]
53 | }
54 | },
55 | "required": [ "config_field", "nickname", "vals" ]
56 | }
57 | }
58 | },
59 | "required": [ "baseline_config", "reports", "tests", "variables" ]
60 | }
61 |
--------------------------------------------------------------------------------
/scripts/setup_lab.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | from pathlib import Path
27 |
28 |
29 | def create_missing_dir(d: Path) -> None:
30 | if not d.exists():
31 | print(f"Creating project dir \"{d}\".")
32 | d.mkdir(parents=True, exist_ok=True)
33 |
34 |
35 | def create_missing_project_dirs(project_dir: str) -> None:
36 | project_subdirs = [
37 | "cache",
38 | "dataroot",
39 | "tensorboard",
40 | ]
41 | create_missing_dir(Path(project_dir))
42 | for subdir in project_subdirs:
43 | create_missing_dir(Path(project_dir, subdir))
44 |
45 |
46 | def main():
47 | parser = argparse.ArgumentParser()
48 | parser.add_argument("project_dir", help="Directory containing the Juneberry project.")
49 | args = parser.parse_args()
50 | create_missing_project_dirs(args.project_dir)
51 |
52 |
53 | if __name__ == "__main__":
54 | main()
55 |
56 |
--------------------------------------------------------------------------------
/juneberry/schemas/rules_list_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "type": "object",
4 | "properties": {
5 | "description": { "type": "string" },
6 | "format_version": { "type": "string" },
7 | "timestamp": { "type": "string" },
8 | "workflows": {
9 | "type": "array",
10 | "items": {
11 | "type": "object",
12 | "properties": {
13 | "name": { "type": "string" },
14 | "rules": {
15 | "type": "array",
16 | "items": {
17 | "type": "object",
18 | "properties": {
19 | "clean_extras": {
20 | "type": "array",
21 | "items": { "type": "string" }
22 | },
23 | "command": {
24 | "type": "array",
25 | "items": { "type": "string" }
26 | },
27 | "doc": { "type": "string" },
28 | "id": { "type": "integer" },
29 | "inputs": {
30 | "type": "array",
31 | "items": { "type": "string" }
32 | },
33 | "outputs": {
34 | "type": "array",
35 | "items": { "type": "string" }
36 | },
37 | "requirements": {
38 | "type": "array",
39 | "items": { "type": "integer" }
40 | }
41 | },
42 | "required": [ "command", "doc", "id", "inputs" ]
43 | }
44 | }
45 | },
46 | "required": [ "name", "rules" ]
47 | }
48 | }
49 | },
50 | "required": [ "workflows" ]
51 | }
52 |
--------------------------------------------------------------------------------
/juneberry/platform.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | class PlatformDefinitions:
26 | def get_model_filename(self) -> str:
27 | """ :return: The name of the model file that the trainer saves and what evaluators should load"""
28 | pass
29 |
30 | def get_config_suffix(self) -> str:
31 | """
32 | Before training we emit the fully realized configuration file used by the platform. Different backend platforms
33 | use different file types and while Juneberry names them all "platform_config", they need to have the correct
34 | suffix and format. This routine returns the suffix used by the platform, such as ".json" or ".yaml." The
35 | default format is ".json"
36 | :return: The suffix used when saving the realized platform_config file before training.
37 | """
38 | return ".json"
39 |
40 | def has_platform_config(self) -> bool:
41 | # TODO: This is somewhat of a hack
42 | return True
43 |
--------------------------------------------------------------------------------
/test/test_logging.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 | from pathlib import Path
27 | import tempfile
28 |
29 | import juneberry.logging as jb_logging
30 |
31 |
32 | def test_filter_repeated_messages():
33 |
34 | with tempfile.TemporaryDirectory() as tmpdir:
35 | tmplog = Path(tmpdir, "test_jb_logging.out")
36 | jb_logging.setup_logger(tmplog, "", name="test_jb_logging", level=logging.DEBUG,
37 | log_filter_class=jb_logging.RemoveDuplicatesFilter)
38 |
39 | logger = logging.getLogger("test_jb_logging")
40 | logger.info("Repeated message.")
41 | logger.info("Repeated message.")
42 | logger.info("Repeated message.")
43 |
44 | # We logged three messages, but because the messages were duplicates,
45 | # only one message should have been logged.
46 | with open(tmplog, 'r') as f:
47 | num_lines = len(f.readlines())
48 | assert num_lines == 1
49 |
--------------------------------------------------------------------------------
/juneberry/schemas/property_inference_attack_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "definitions": {
4 | "plugin": {
5 | "type": "object",
6 | "properties": {
7 | "fqcn": { "type": "string" },
8 | "kwargs": { "type": "object" }
9 | },
10 | "required": [ "fqcn" ]
11 | }
12 | },
13 | "type": "object",
14 | "description": "A configuration for building an attack experiment using basic Juneberry commands.",
15 | "properties": {
16 | "data_configs": {
17 | "type": "object",
18 | "properties": {
19 | "in_out_builder": { "$ref": "#/definitions/plugin" },
20 | "query_data": { "type": "string" },
21 | "training_data": { "type": "string" }
22 | },
23 | "required": ["in_out_builder", "query_data", "training_data"]
24 | },
25 | "models": {
26 | "type": "object",
27 | "properties": {
28 | "meta": { "type": "string" },
29 | "private": { "type": "string" },
30 | "shadow": { "type": "string" },
31 | "shadow_disjoint_quantity": { "type": "integer" },
32 | "shadow_superset_quantity": { "type": "integer" }
33 | },
34 | "required": ["meta", "private", "shadow", "shadow_disjoint_quantity", "shadow_superset_quantity"]
35 | },
36 | "watermarks": {
37 | "type": "object",
38 | "properties": {
39 | "disjoint_args": { "type": "array", "items": { "type": "object" } },
40 | "private_disjoint_args": { "type": "object" },
41 | "private_superset_args": { "type": "object" },
42 | "query_watermarks": { "$ref": "#/definitions/plugin" },
43 | "superset_args": { "type": "array", "items": { "type": "object" } },
44 | "training_watermarks": { "$ref": "#/definitions/plugin" }
45 | },
46 | "required": ["disjoint_args", "private_disjoint_args", "private_superset_args", "query_watermarks",
47 | "superset_args", "training_watermarks"]
48 | }
49 | },
50 | "required": ["data_configs", "models", "watermarks"]
51 | }
--------------------------------------------------------------------------------
/juneberry/transforms/random_crop_mirror.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | Simple transformer to shift an image that may have been mirrored. The JSON configuration requires three arguments:
27 | amount of horizontal shift allowed (in pixels), amount of vertical shift allowed (in pixels) and a boolean to control
28 | whether or not the image should be mirrored.
29 |
30 | "config": { "width_pixels": 0, "height_pixels": 0, "mirror": 0 }
31 |
32 | """
33 |
34 | import juneberry.image as jb_img_utils
35 |
36 |
37 | class RandomCropMirror:
38 | def __init__(self, width_pixels, height_pixels, mirror):
39 | self.width_pixels = width_pixels
40 | self.height_pixels = height_pixels
41 | self.mirror = mirror
42 |
43 | def __call__(self, image):
44 | """
45 | Transformation function that is provided a PIL image.
46 | :param image: The source PIL image.
47 | :return: The transformed PIL image.
48 | """
49 | return jb_img_utils.random_crop_mirror_image(image, self.mirror, self.width_pixels, self.height_pixels)
50 |
--------------------------------------------------------------------------------
/juneberry/metrics/objectdetection/brambox/utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import json
26 | from typing import Dict, Tuple
27 |
28 | import brambox as bb
29 | from pandas import DataFrame
30 |
31 | from juneberry.config import coco_utils
32 |
33 |
34 | def get_df(anno: Dict, det: Dict) -> Tuple[DataFrame, DataFrame]:
35 | """
36 | Create brambox-compatible DataFrames to be used in Metrics calls.
37 | :param anno: a dict of annotations in COCO format
38 | :param det: a dict of detections in COCO format
39 | :return: the annotations and detections DataFrames
40 | """
41 | # TODO don't need to make an anno_parser every time, it doesn't depend on anno or det
42 | anno_parser = bb.io.parser.annotation.CocoParser(parse_image_names=False)
43 | anno_parser.deserialize(json.dumps(anno))
44 | anno_df = anno_parser.get_df()
45 |
46 | det_parser = bb.io.parser.detection.CocoParser(class_label_map=coco_utils.get_class_label_map(anno))
47 | det_parser.deserialize(json.dumps(det))
48 | det_df = det_parser.get_df()
49 |
50 | return anno_df, det_df
51 |
--------------------------------------------------------------------------------
/test/config/test_training_output.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | from pathlib import Path
26 |
27 | from juneberry.config.training_output import TrainingOutputBuilder
28 | from juneberry.config.model import ModelConfig
29 | import utils
30 |
31 |
32 | def test_builder(tmp_path):
33 | builder = TrainingOutputBuilder()
34 |
35 | mc = ModelConfig.from_dict(utils.make_basic_model_config())
36 |
37 | builder.set_from_model_config("test_config", mc)
38 |
39 | # The schema should require these things
40 | builder.output.options.num_training_images = 0
41 | builder.output.options.num_validation_images = 0
42 | builder.output.options.validation_dataset_config_path = ""
43 | builder.output.options.training_dataset_config_path = ""
44 |
45 | builder.output.results.accuracy = [0.0]
46 | builder.output.results.loss = [0.0]
47 | builder.output.results.model_hash = ""
48 | builder.output.results.val_accuracy = [0.0]
49 | builder.output.results.val_loss = [0.0]
50 |
51 | out_path = Path(tmp_path) / "tmp.json"
52 | builder.save(str(out_path))
53 |
54 |
--------------------------------------------------------------------------------
/juneberry/schemas/tuning_output_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "type": "object",
4 | "properties": {
5 | "format_version": { "type": "string" },
6 | "options": {
7 | "type": "object",
8 | "properties": {
9 | "model_name": { "type": "string" },
10 | "tuning_config": { "type": "string" }
11 | },
12 | "required": [ "model_name", "tuning_config" ]
13 | },
14 | "results": {
15 | "type": "object",
16 | "properties": {
17 | "best_trial_id": { "type": "string" },
18 | "best_trial_params": { "type": "object" },
19 | "trial_results": {
20 | "type": "array",
21 | "items": {
22 | "type": "object",
23 | "properties": {
24 | "directory": {
25 | "type": "string"
26 | },
27 | "id": {
28 | "type": "string"
29 | },
30 | "num_iterations": {
31 | "type": "integer"
32 | },
33 | "params": {
34 | "type": "object"
35 | },
36 | "result_data": {
37 | "type": "object"
38 | }
39 | },
40 | "required": [
41 | "directory",
42 | "id",
43 | "num_iterations",
44 | "params",
45 | "result_data"
46 | ]
47 | }
48 | }
49 | },
50 | "required": [ "best_trial_id", "trial_results" ]
51 | },
52 | "times": {
53 | "type": "object",
54 | "properties": {
55 | "duration": { "type": "number" },
56 | "end_time": { "type": "string" },
57 | "start_time": { "type": "string" }
58 | },
59 | "required": [ ]
60 | }
61 | },
62 | "required": [ "options", "results" ]
63 | }
64 |
--------------------------------------------------------------------------------
/scripts/dry_run_all_models.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | Trivial script to call "dry run" on every model directory that has a config file.
27 | """
28 |
29 | import argparse
30 | import os
31 | from pathlib import Path
32 | import subprocess
33 | import sys
34 |
35 |
36 | def main():
37 | parser = argparse.ArgumentParser(description="Script to call '--dryrun' on every model directory in "
38 | "the specified workspace.")
39 | parser.add_argument("workspace", help="Workspace root (above models).")
40 | args = parser.parse_args()
41 |
42 | workspace = Path(".") / args.workspace
43 | os.chdir(workspace)
44 |
45 | for config_path in workspace.glob("models/**/config.json"):
46 | model_name = "/".join(config_path.parts[1:-1])
47 | print(f"******** DRY RUN on {model_name}")
48 | result = subprocess.run(['jb_train', '--dryrun', '-w', str(workspace), model_name])
49 |
50 | if result.returncode != 0:
51 | print(f"Failed to do dry run '{result.returncode}' on {model_name}. EXITING!!")
52 | sys.exit(-1)
53 |
54 |
55 | if __name__ == "__main__":
56 | main()
57 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | README
4 | ==========
5 |
6 | # Introduction
7 |
8 | Juneberry improves the experience of machine learning experimentation by providing a framework for automating
9 | the training, evaluation, and comparison of multiple models against multiple datasets, thereby reducing errors and
10 | improving reproducibility.
11 |
12 | This README describes how to use the Juneberry framework to execute machine learning tasks. Juneberry follows a (mostly)
13 | declarative programming model composed of sets of config files (dataset, model, and experiment configurations) and
14 | Python plugins for features such as model construction and transformation.
15 |
16 | If you're looking for a slightly more in depth description of Juneberry see [What Is Juneberry](docs/whatis.md).
17 |
18 | Other resources can be found at the [Juneberry Home Page](https://www.sei.cmu.edu/our-work/projects/display.cfm?customel_datapageid_4050=334902)
19 |
20 | # Supporting Documentation
21 |
22 | ## How to Install Juneberry
23 |
24 | The [Getting Started](docs/getting_started.md) documentation explains how to install Juneberry. It also
25 | includes a simple test command you can use to verify the installation.
26 |
27 | ## Experiment Overview
28 |
29 | The [Workspace and Experiment Overview](docs/overview.md) documentation contains information about
30 | the structure of the Juneberry workspace and how to organize experiments.
31 |
32 | ## Experiment Tutorial
33 |
34 | The [Juneberry Basic Tutorial](docs/tutorial.md) describes how to create a model, train the model,
35 | and run an experiment.
36 |
37 | ## Configuring Juneberry
38 |
39 | The [Juneberry Configuration Guide](docs/configuring.md) describes various ways to configure Juneberry.
40 |
41 | ## Known Warnings
42 |
43 | During normal use of Juneberry, you may encounter warning messages. The
44 | [Known Warnings in Juneberry](docs/known_warnings.md) documentation contains information about known warning
45 | messages and what (if anything) should be done about them.
46 |
47 | ## Further Reading
48 |
49 | The [vignettes](docs/vignettes) directory contains detailed walkthroughs of various Juneberry tasks.
50 | The vignettes provide helpful examples of how to construct various Juneberry configuration files,
51 | including datasets, models, and experiments. A good start is
52 | [Replicating a Classic Machine Learning Result with Juneberry](docs/vignettes/vignette1/Replicating_a_Classic_Machine_Learning_Result_with_Juneberry.md).
53 |
54 | # Copyright
55 |
56 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms.
57 |
--------------------------------------------------------------------------------
/juneberry/metrics/classification/sklearn/metrics.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | This module provides a sklearn classification metric plugin to be used with Juneberry.
27 | """
28 | import logging
29 | from typing import Dict
30 |
31 | from juneberry.evaluation import utils as jb_eval_utils
32 | from juneberry.loader import load_verify_fqn_function
33 | from juneberry.metrics.classification.metrics import MetricsBase
34 |
35 | logger = logging.getLogger(__name__)
36 |
37 |
38 | class Metrics(MetricsBase):
39 |
40 | def __init__(self,
41 | fqn: str,
42 | name: str,
43 | kwargs: Dict = None) -> None:
44 | super().__init__(fqn, name, kwargs)
45 |
46 | def __call__(self, target, preds, binary=False):
47 | singular_preds = jb_eval_utils.continuous_predictions_to_class(preds, binary)
48 | metrics_function = load_verify_fqn_function(self.fqn, {**{"y_true": [], "y_pred": []}, **self.kwargs})
49 |
50 | if not metrics_function:
51 | log_msg = f"Unable to create metrics function: fqn={self.fqn}, name={self.name}, kwargs={self.kwargs}."
52 | logger.error(log_msg)
53 | raise ValueError(log_msg)
54 |
55 | return metrics_function(target, singular_preds, **self.kwargs)
56 |
--------------------------------------------------------------------------------
/scripts/predictions_to_coco.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | from pathlib import Path
27 |
28 | import juneberry.config.coco_utils as coco_utils
29 | import juneberry.scripting.utils as jb_scripting
30 |
31 |
32 | def setup_args(parser) -> None:
33 | """
34 | Adds arguments to the parser
35 | :param parser: The parser in which to add arguments.
36 | """
37 | parser.add_argument('dataset', help='Data set used to drive predictions.')
38 | parser.add_argument('predictions', help='Path to predictions file within workspace to convert.')
39 | parser.add_argument('output', help='Path to file for coco output.')
40 |
41 |
42 | def main():
43 | # Setup and parse all arguments.
44 | parser = argparse.ArgumentParser(description="Converts predictions output to coco format.")
45 | setup_args(parser)
46 | jb_scripting.setup_args(parser)
47 | args = parser.parse_args()
48 |
49 | # TODO: Updated jb_scripting to be more clear
50 | lab = jb_scripting.setup_workspace(args, log_file=None)
51 |
52 | coco_utils.save_predictions_as_anno(data_root=lab.data_root(), dataset_config=args.dataset,
53 | predict_file=args.predictions, output_file=Path(args.output))
54 |
55 |
56 | if __name__ == "__main__":
57 | main()
58 |
--------------------------------------------------------------------------------
/scripts/merge_predictions.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | import csv
27 |
28 | import juneberry.filesystem as jb_fs
29 |
30 |
31 | def add_predictions(filepath, id, csvwriter):
32 | data = jb_fs.load_file(filepath)
33 |
34 | labels = data['testResults']['labels']
35 | for i, preds in enumerate(data['testResults']['predictions']):
36 | row = [id, labels[i]] + preds
37 | csvwriter.writerow(row)
38 |
39 |
40 | def setup_args(parser) -> None:
41 | """
42 | Adds arguments to the parser
43 | :param parser: The parser in which to add arguments.
44 | """
45 | parser.add_argument('predictions0', help='First predictions file.')
46 | parser.add_argument('predictions1', help='Second predictions file.')
47 | parser.add_argument('output', help='Path to file for coco output.')
48 |
49 |
50 | def main():
51 | # Setup and parse all arguments.
52 | parser = argparse.ArgumentParser(description="Merges and converts two predictions files to a single csv output.")
53 | setup_args(parser)
54 | args = parser.parse_args()
55 |
56 | with open(args.output, "w") as csv_file:
57 | csv_writer = csv.writer(csv_file)
58 | add_predictions(args.predictions0, 0, csv_writer)
59 | add_predictions(args.predictions1, 1, csv_writer)
60 |
61 | if __name__ == "__main__":
62 | main()
--------------------------------------------------------------------------------
/juneberry/metrics/classification/torchnn/metrics.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | This module provides a torchnn classification metric plugin to be used with Juneberry.
27 | """
28 | import logging
29 | from typing import Dict
30 |
31 | import torch
32 |
33 | from juneberry.loader import construct_instance
34 | from juneberry.metrics.classification.metrics import MetricsBase
35 | import juneberry.pytorch.utils as pyt_utils
36 |
37 | logger = logging.getLogger(__name__)
38 |
39 |
40 | class Metrics(MetricsBase):
41 |
42 | def __init__(self,
43 | fqn: str,
44 | name: str,
45 | kwargs: Dict = None) -> None:
46 | super().__init__(fqn, name, kwargs)
47 |
48 | def __call__(self, target, preds, binary=False):
49 | target, preds = torch.LongTensor(target), torch.FloatTensor(preds)
50 | metrics_function = construct_instance(self.fqn, self.kwargs)
51 |
52 | if not metrics_function:
53 | log_msg = f"Unable to create metrics function: fqn={self.fqn}, name={self.name}, kwargs={self.kwargs}."
54 | logger.error(log_msg)
55 | raise ValueError(log_msg)
56 |
57 | if binary:
58 | metrics_function = pyt_utils.function_wrapper_unsqueeze_1(metrics_function)
59 | result = metrics_function(preds, target, **self.kwargs)
60 | return result.numpy()
61 |
--------------------------------------------------------------------------------
/juneberry/scripting/tuning_sprout.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 | from argparse import Namespace
25 | from dataclasses import dataclass
26 | import logging
27 |
28 | from juneberry.scripting.sprout import Sprout
29 |
30 | logger = logging.getLogger(__name__)
31 |
32 |
33 | @dataclass
34 | class TuningSprout(Sprout):
35 | """
36 | The TuningSprout class extends the Sprout class to include args related to model tuning.
37 | """
38 | # ========== SCRIPT ARGS ==========
39 | # ===== EXECUTION MODE ARGS =====
40 | dryrun: bool = None
41 |
42 | # ===== MODEL ARGS =====
43 | model_name: str = None
44 |
45 | # ===== TUNING ARGS =====
46 | tuning_config: str = None
47 |
48 | def grow_from_args(self, args: Namespace) -> None:
49 | """
50 | This method reads a Namespace of arguments and sets the corresponding attributes in the Sprout.
51 | :param args: A Namespace of arguments, typically created by passing arguments to a Juneberry script.
52 | :return: Nothing.
53 | """
54 | # Start by setting the attributes in the base Sprout.
55 | super().grow_from_args(args)
56 |
57 | # Now set the attributes listed in the TuningSprout.
58 | self.dryrun = getattr(args, "dryrun", False)
59 | self.model_name = getattr(args, "modelName", None)
60 | self.tuning_config = getattr(args, "tuningConfig", None)
61 |
--------------------------------------------------------------------------------
/docker/run_container_minimal.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env bash
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | # =================================================================================================
26 | # WARNING: These images and scripts create containers with NO SECURITY PRACTICES, such as
27 | # separate user accounts, unprivileged users, etc.
28 | #
29 | # USE AT YOUR OWN RISK
30 | # =================================================================================================
31 |
32 | # This script provides a starting point for creating your own container launcher. If your layout
33 | # follows the basic Juneberry lab layout, then this script should basically work as-is.
34 | #
35 | # Run this script from inside your workspace of choice.
36 |
37 | WS=${PWD}
38 | LAB="$(dirname "$WS")"
39 | CACHE="${LAB}/cache"
40 | docker run -it --rm --network=host --ipc=host --name ${USER} \
41 | --env HTTP_PROXY --env http_proxy --env HTTPS_PROXY --env https_proxy --env NO_PROXY --env no_proxy \
42 | -e USER_NAME=${USER} -e USER_ID=$(id -u ${USER}) -e USER_GID=$(id -g ${USER}) -e HOST_UNAME=$(uname) \
43 | -v ${WS}:/workspace -w /workspace \
44 | -v ${LAB}/juneberry:/juneberry \
45 | -v ${LAB}/dataroot:/dataroot:ro \
46 | -v ${LAB}/tensorboard:/tensorboard \
47 | -v ${CACHE}/hub:/root/.cache/torch/hub \
48 | -v ${CACHE}/torch:/root/.torch \
49 | -v ${CACHE}/tensorflow:/root/tensorflow_datasets \
50 | cmusei/juneberry:cpudev \
51 | bash
--------------------------------------------------------------------------------
/test/pytorch/test_utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | from juneberry.config.model import PytorchOptions
26 | import juneberry.pytorch.utils as pyt_utils
27 |
28 |
29 | class DummyLoss:
30 | def __init__(self, model):
31 | model['return'] = 'World'
32 |
33 | def __call__(self, predicted, target):
34 | return predicted + target
35 |
36 |
37 | def test_make_loss():
38 | # NOTE: This is run with the current directory (not the root test directory) in the python path
39 | config = PytorchOptions.from_dict({'loss_fn': 'pytorch.test_utils.DummyLoss'})
40 | model = {'input': 'Hello'}
41 |
42 | loss = pyt_utils.make_loss(config, model, False)
43 | assert model['return'] == 'World'
44 | assert loss(2, 3) == 5
45 |
46 |
47 | class DummyLR:
48 | def __init__(self, optimizer, epochs, foo):
49 | self.optimizer = optimizer
50 | self.epochs = epochs
51 | self.foo = foo
52 |
53 |
54 | def test_make_lr_schedule():
55 | lr_options = PytorchOptions.from_dict({
56 | "lr_schedule_args": {
57 | "epochs": 25,
58 | "foo": "bar"
59 | },
60 | "lr_schedule_fn": "pytorch.test_utils.DummyLR"
61 | })
62 |
63 | # These epochs should override
64 | lr_scheduler = pyt_utils.make_lr_scheduler(lr_options, "hello", 10)
65 | assert lr_scheduler.optimizer == "hello"
66 | assert lr_scheduler.epochs == 10
67 | assert lr_scheduler.foo == "bar"
68 |
--------------------------------------------------------------------------------
/juneberry/detectron2/transforms.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 |
27 | import numpy as np
28 |
29 | logger = logging.getLogger(__name__)
30 |
31 |
32 | class DT2NoOp:
33 | """
34 | Example of a (no-operation) transformer which demonstrates ALL available extension points when
35 | building your own DT2 Transform class.
36 | """
37 | def apply_image(self, img: np.ndarray) -> np.ndarray:
38 | return img
39 |
40 | def apply_box(self, box: np.ndarray) -> np.ndarray:
41 | return box
42 |
43 | def apply_coords(self, coords: np.ndarray) -> np.ndarray:
44 | return coords
45 |
46 | def apply_polygons(self, polygons: list) -> list:
47 | return polygons
48 |
49 | def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
50 | return segmentation
51 |
52 |
53 | class DT2Logger:
54 | def __init__(self, msg="None"):
55 | self.msg = msg
56 |
57 | def apply_coords(self, coords: np.ndarray):
58 | logger.info(f"apply_coords: msg={self.msg}, coords-type={type(coords)}")
59 | return coords
60 |
61 | def apply_polygons(self, polygons: list) -> list:
62 | logger.info(f"apply_polygons: msg={self.msg}, polygons-type={type(polygons)}")
63 | return polygons
64 |
65 | def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
66 | logger.info(f"apply_segmentation: msg={self.msg}, segmentation-type={type(segmentation)}")
67 | return segmentation
68 |
--------------------------------------------------------------------------------
/test/test_image.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | Unit tests for core_utils for use by pytest.
27 | """
28 |
29 | import numpy as np
30 | from PIL import Image
31 |
32 | import juneberry.image as jb_image
33 |
34 |
35 | def make_raw_images():
36 | images = []
37 | for gray in range(0, 10):
38 | gray_frac = gray * 10
39 | color = f"rgb({gray_frac}%, {gray_frac}%, {gray_frac}%)"
40 | images.append(Image.new('RGB', (10, 10), color))
41 |
42 | return images
43 |
44 |
45 | def test_compute_elementwise_mean() -> None:
46 | """
47 | We do one simple 2x2 test to make sure we get basic results.
48 | """
49 | a = np.array(range(1, 5), dtype='uint8')
50 | b = np.array(range(10, 50, 10), dtype='uint8')
51 | raw_correct = []
52 | for i in range(0, 4):
53 | raw_correct.append(int((a[i] + b[i]) / 2))
54 |
55 | a = a.reshape((2, 2))
56 | b = b.reshape((2, 2))
57 | correct = np.array(raw_correct).reshape((2, 2))
58 |
59 | results = jb_image.compute_elementwise_mean(np.array([a, b]))
60 |
61 | for i in range(0, 2):
62 | for j in range(0, 2):
63 | assert correct[i][j] == results[i][j]
64 |
65 |
66 | def test_channel_means() -> None:
67 | images = make_raw_images()
68 | images = [np.array(image) for image in images]
69 | results = jb_image.compute_channel_means(images)
70 | assert results[0] == 0.45098039215686275
71 | assert results[1] == 0.45098039215686275
72 | assert results[2] == 0.45098039215686275
73 |
--------------------------------------------------------------------------------
/docs/software_maintenance.md:
--------------------------------------------------------------------------------
1 | Maintenance
2 | ============
3 |
4 | # Overview
5 |
6 | This page covers standards, techniques, etc.
7 |
8 | # Python
9 | ## Installation & Usage
10 | - It is suggested that you use a virtual environment
11 | - The choice is yours, but the use of [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) is suggested
12 | - This framework is designed for python versions 3.7+
13 | - You can use [pyenv](https://github.com/pyenv/pyenv) to manage different python versions on your computer
14 | - Required python packages are documented in the Pipfile
15 | ## Coding Guidelines
16 | - Standard python naming standards, etc.
17 | - Configure logging for info
18 |
19 | ### Structure
20 | For coding structure we like to separate the argument parsing from the business functionality to allow the
21 | script to be loaded externally and have the business functions called. The usual pattern is to have the main()
22 | routine do all the argument parsing, validity checks, open files, and read configs then call the business functions.
23 | So, something like:
24 |
25 | ```
26 | #! /usr/bin/env python3
27 |
28 | import argparse
29 | import logging
30 |
31 |
32 | # Use some better name than business logic...
33 | def hello_world(data_root):
34 | print(f"Hello world from {data_root}")
35 |
36 |
37 | def main():
38 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
39 |
40 | parser = argparse.ArgumentParser(description="Pithy comment here.")
41 | parser.add_argument('dataRoot', help='Root of data directory')
42 |
43 | args = parser.parse_args()
44 |
45 | hello_world(args.dataRoot)
46 |
47 |
48 | if __name__ == "__main__":
49 | main()
50 | ```
51 |
52 | ## JSON
53 | - snake_case names for properties
54 | - 4 space indent
55 |
56 | ## Git
57 | - We use standard gitflow style
58 | - We default to squash on merges
59 | - When branches get confusing, prefer to rebase to a new branch with the suffix "-merge"
60 | - Most tasks are features
61 |
62 | # Design Notes
63 |
64 | ## Config files
65 | Juneberry is a config-driven system. Different types of config files store different concerns, for example the structure
66 | of a model would be in one type of config, while the contents of a dataset would be defined in another. Regardless of
67 | config type, a similar process is used to load config data, so all configs follow a similar philosophy.
68 |
69 | 1) Files or blob are in common python-happy formats such as json, yaml or toml
70 | 1) Files are loaded into pure python data structures
71 | 1) Version conversions are performed
72 | 1) Data structures are validated using jsonschema regardless of file format
73 | 1) Validated structures are loaded into convenience data models such as Prodict.
74 |
75 | # Copyright
76 |
77 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms.
78 |
--------------------------------------------------------------------------------
/test/test_utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import juneberry.utils as jb_utils
26 |
27 |
28 | def setup_data():
29 | test_data = {
30 | "someKey": 1,
31 | "otherKey": 2,
32 | 'nested': {
33 | 'arrayKey': [1, 2, 3],
34 | 'dictKey': {
35 | 'subKeyA': 'Frodo',
36 | 'subKeyB': 'Sam',
37 | 'okay': 'Merry'
38 | }
39 | }
40 | }
41 |
42 | expected_data = {
43 | "some_key": 1,
44 | "other_key": 2,
45 | 'nested': {
46 | 'array_key': [1, 2, 3],
47 | 'dict_key': {
48 | 'sub_key_a': 'Frodo',
49 | 'sub_key_b': 'Sam',
50 | 'okay': 'Merry'
51 | }
52 | }
53 | }
54 |
55 | key_map = {
56 | 'someKey': 'some_key',
57 | 'otherKey': 'other_key',
58 | 'arrayKey': 'array_key',
59 | 'dictKey': 'dict_key',
60 | 'subKeyA': 'sub_key_a',
61 | 'subKeyB': 'sub_key_b',
62 | }
63 |
64 | return test_data, expected_data, key_map
65 |
66 |
67 | def test_rekey():
68 | test_data, expected_data, key_map = setup_data()
69 |
70 | # Convert based on a known key_map
71 | jb_utils.rekey(test_data, key_map)
72 | assert expected_data == test_data
73 |
74 |
75 | def test_snake_case():
76 | test_data, expected_data, key_map = setup_data()
77 |
78 | # Convert based on the algo
79 | new_map = jb_utils.mixed_to_snake_struct_keys(test_data)
80 | assert expected_data == test_data
81 | assert key_map == new_map
82 |
--------------------------------------------------------------------------------
/test/test_transforms.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | The vast majority of transformers just wrap specific functional calls such
27 | as calls in the image support. We expect those tests to cover that part of the
28 | functionality.
29 |
30 | These tests are to make sure that the transforms have the proper signature
31 | and can be loaded by the transform_manager. Thus, in most cases all we need
32 | to do is to pass the configuration into the transform manager.
33 |
34 | """
35 |
36 | import juneberry.transforms.transform_manager
37 |
38 |
39 | def test_load_random_crop():
40 | config = [
41 | {
42 | 'fqcn': 'juneberry.transforms.random_crop_mirror.RandomCropMirror',
43 | 'kwargs': {"width_pixels": 0, "height_pixels": 0, "mirror": 0}
44 | }
45 | ]
46 |
47 | jtm = juneberry.transforms.transform_manager.TransformManager(config)
48 | assert len(jtm) == 1
49 |
50 |
51 | def test_load_mirror_flip():
52 | config = [
53 | {
54 | 'fqcn': 'juneberry.transforms.random_mirror_flip.RandomMirrorFlip',
55 | 'kwargs': {"mirror_chance": 0.0, "flip_chance": 0.0}
56 | }
57 | ]
58 |
59 | jtm = juneberry.transforms.transform_manager.TransformManager(config)
60 | assert len(jtm) == 1
61 |
62 |
63 | def test_load_random_shift():
64 | config = [
65 | {
66 | 'fqcn': 'juneberry.transforms.random_shift.RandomShift',
67 | 'kwargs': {"max_width": 0.0, "max_height": 0.0}
68 | }
69 | ]
70 |
71 | jtm = juneberry.transforms.transform_manager.TransformManager(config)
72 | assert len(jtm) == 1
73 |
--------------------------------------------------------------------------------
/juneberry/scripting/sprout.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 | from argparse import Namespace
25 | from dataclasses import dataclass
26 | import logging
27 |
28 | logger = logging.getLogger(__name__)
29 |
30 |
31 | @dataclass
32 | class Sprout:
33 | """
34 | The purpose of the Sprout class is to capture the arguments that are passed into Juneberry
35 | scripts. The base Sprout class reflects the args that are common to all scripts.
36 | """
37 | # ========== SCRIPT ARGS ==========
38 | # ===== DIRECTORY ARGS =====
39 | workspace_dir: str = None
40 | dataroot_dir: str = None
41 | tensorboard_dir: str = None
42 | log_dir: str = None
43 |
44 | # ===== LOGGING ARGS =====
45 | silent: bool = None
46 | log_level: int = None
47 |
48 | # ===== LAB ARGS =====
49 | profile_name: str = None
50 |
51 | def grow_from_args(self, args: Namespace) -> None:
52 | """
53 | This method reads a Namespace of arguments and sets the corresponding attributes in the Sprout.
54 | :param args: A Namespace of arguments, typically created by passing arguments to a Juneberry script.
55 | :return: Nothing.
56 | """
57 | self.workspace_dir = getattr(args, "workspace", None)
58 | self.dataroot_dir = getattr(args, "dataRoot", None)
59 | self.tensorboard_dir = getattr(args, "tensorboard", None)
60 | self.log_dir = getattr(args, "logDir", None)
61 | self.silent = getattr(args, "silent", False)
62 | self.log_level = logging.DEBUG if getattr(args, "verbose", None) else logging.INFO
63 | self.profile_name = getattr(args, "profileName", None)
64 |
--------------------------------------------------------------------------------
/test/moddir/simple_mod.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 |
26 | def binary_function(a, b):
27 | return f"{a} and {b}"
28 |
29 |
30 | class MyClass:
31 | @staticmethod
32 | def unary(a):
33 | return f"a is {a}"
34 |
35 |
36 | class ClassWithInit:
37 | def __init__(self, name):
38 | self.name = name
39 |
40 | def __call__(self):
41 | return f"{self.name}"
42 |
43 | def get_name(self):
44 | return self.name
45 |
46 |
47 | class ClassWithInitAndUnaryCall:
48 | def __init__(self, name):
49 | self.name = name
50 |
51 | def __call__(self, arg):
52 | return f"{self.name} {arg}"
53 |
54 | def get_name(self):
55 | return self.name
56 |
57 |
58 | class ClassWithUnaryCallWithOptArg1:
59 | def __init__(self):
60 | self.name = "No name"
61 |
62 | def __call__(self, arg, opt1=None):
63 | return f"{arg} {opt1}"
64 |
65 | def get_name(self):
66 | return self.name
67 |
68 |
69 | class ClassWithUnaryCallWithOptArg2:
70 | def __init__(self):
71 | self.name = "No name"
72 |
73 | def __call__(self, arg, opt2=None):
74 | return f"{arg} {opt2}"
75 |
76 | def get_name(self):
77 | return self.name
78 |
79 |
80 | class LabeledTransformExample:
81 | def __init__(self):
82 | self.name = "No name"
83 |
84 | def __call__(self, arg, *, label, opt1=None):
85 | return f"{arg} {opt1}", int(label) + 1
86 |
87 | def get_name(self):
88 | return self.name
89 |
90 |
91 | def transform_maker():
92 | return lambda x: x + x
93 |
94 |
95 | def transform_maker_arg(y):
96 | return lambda x: y + x
97 |
--------------------------------------------------------------------------------
/juneberry/tensorflow/utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import hashlib
26 | import io
27 | import logging
28 | import sys
29 |
30 | import tensorflow as tf
31 |
32 | from juneberry.platform import PlatformDefinitions
33 | import juneberry.utils as jb_utils
34 |
35 | logger = logging.getLogger(__name__)
36 |
37 |
38 | class TensorFlowPlatformDefinitions(PlatformDefinitions):
39 | def get_model_filename(self) -> str:
40 | """ :return: The name of the model file that the trainer saves and what evaluators should load"""
41 | return "model.h5"
42 |
43 |
44 | def save_summary(model, summary_file_path):
45 | orig = sys.stdout
46 | sys.stdout = open(summary_file_path, 'w+', encoding="utf-8")
47 | model.summary()
48 | sys.stdout = orig
49 |
50 |
51 | def hash_summary(model):
52 | # Swap out a string buffer and capture the summary in the buffer.
53 | output = io.StringIO()
54 | orig = sys.stdout
55 | sys.stdout = output
56 | model.summary()
57 | sys.stdout = orig
58 |
59 | # Hash the model summary and stash off the digest before destroying the buffer.
60 | hasher = hashlib.sha256()
61 | hasher.update(output.getvalue().encode('utf-8'))
62 | digest = hasher.hexdigest()
63 |
64 | # Close the object and discard the memory buffer.
65 | output.close()
66 |
67 | return digest
68 |
69 |
70 | def set_tensorflow_seeds(seed: int):
71 | """
72 | Sets all the random seeds used by all the various pieces.
73 | :param seed: A random seed to use. Can not be None.
74 | """
75 | jb_utils.set_seeds(seed)
76 | logger.info(f"Setting TensorFlow seed to: {str(seed)}")
77 | tf.random.set_seed(seed)
78 |
--------------------------------------------------------------------------------
/test/test_tabular_data_set.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import csv
26 | import math
27 |
28 | import juneberry.data
29 | import juneberry.pytorch.tabular_dataset as tabular
30 |
31 |
32 | def make_sample_csv(tmp_path, filename, content):
33 | with open(tmp_path / filename, 'w') as csv_file:
34 | writer = csv.writer(csv_file, delimiter=',')
35 | for row in content:
36 | writer.writerow(row)
37 |
38 |
39 | def test_csv_loader(tmp_path):
40 | header = ["col1", "col2", "col3"]
41 | make_sample_csv(tmp_path, "file1.csv", [header, [0.1, 0.2, 1], [0.3, 0.4, 0]])
42 | make_sample_csv(tmp_path, "file2.csv", [header, [0.5, 0.6, 0], [0.7, 0.8, 2]])
43 |
44 | labeled_data = juneberry.data.load_labeled_csvs([tmp_path / "file1.csv", tmp_path / "file2.csv"], 2)
45 |
46 | # Load the data and put it in into the data set
47 | rows_labels = juneberry.data.flatten_dict_to_pairs(labeled_data)
48 | ds = tabular.TabularDataset(rows_labels, None)
49 |
50 | assert 4 == len(ds)
51 |
52 | row, label = ds[0]
53 | assert math.isclose(row[0], 0.1, rel_tol=1e-2)
54 | assert math.isclose(row[1], 0.2, rel_tol=1e-2)
55 | assert label == 1
56 |
57 | row, label = ds[1]
58 | assert math.isclose(row[0], 0.3, rel_tol=1e-2)
59 | assert math.isclose(row[1], 0.4, rel_tol=1e-2)
60 | assert label == 0
61 |
62 | row, label = ds[2]
63 | assert math.isclose(row[0], 0.5, rel_tol=1e-2)
64 | assert math.isclose(row[1], 0.6, rel_tol=1e-2)
65 | assert label == 0
66 |
67 | row, label = ds[3]
68 | assert math.isclose(row[0], 0.7, rel_tol=1e-2)
69 | assert math.isclose(row[1], 0.8, rel_tol=1e-2)
70 | assert label == 2
71 |
--------------------------------------------------------------------------------
/juneberry/scripting/training_sprout.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 | from argparse import Namespace
25 | from dataclasses import dataclass
26 | import logging
27 |
28 | from juneberry.scripting.sprout import Sprout
29 |
30 | logger = logging.getLogger(__name__)
31 |
32 |
33 | @dataclass()
34 | class TrainingSprout(Sprout):
35 | """
36 | The TrainingSprout class extends the base Sprout class to include attributes related to training
37 | models in Juneberry.
38 | """
39 | # ========== SCRIPT ARGS ==========
40 | # ===== EXECUTION MODE ARGS =====
41 | dryrun: bool = None
42 | num_gpus: int = None
43 | resume: bool = None
44 |
45 | # ===== OUTPUT FORMAT ARGS =====
46 | onnx: bool = None
47 | skip_native: bool = None
48 |
49 | # ===== MODEL ARGS =====
50 | model_name: str = None
51 |
52 | def grow_from_args(self, args: Namespace) -> None:
53 | """
54 | This method reads a Namespace of arguments and sets the corresponding attributes in the Sprout.
55 | :param args: A Namespace of arguments, typically created by passing arguments to a Juneberry script.
56 | :return: Nothing.
57 | """
58 | # Start by setting the attributes in the base Sprout.
59 | super().grow_from_args(args)
60 |
61 | # Now set the attributes stored in the TrainingSprout.
62 | self.model_name = getattr(args, "modelName", None)
63 | self.num_gpus = getattr(args, "num_gpus", None)
64 | self.dryrun = getattr(args, "dryrun", False)
65 | self.resume = getattr(args, "resume", False)
66 | self.skip_native = getattr(args, "skipNative", False)
67 | self.onnx = getattr(args, "onnx", False)
68 |
--------------------------------------------------------------------------------
/juneberry/reporting/report.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 | from pathlib import Path
27 |
28 | logger = logging.getLogger(__name__)
29 |
30 |
31 | class Report:
32 | """
33 | This is the base class for all reports.
34 | """
35 | def __init__(self, output_str: str = ""):
36 | # If an empty output string was provided, set the output directory for the report to the
37 | # current directory.
38 | if output_str == "":
39 | logger.warning(f"An output path for the report was not provided. Saving the report to the "
40 | f"current working directory.")
41 | self.output_dir = Path.cwd()
42 | else:
43 | # If a string was provided, first convert it to a Path.
44 | self.output_dir = Path(output_str)
45 |
46 | # Now check the final component in the Path. If it contains a "." that means the final
47 | # component contains a file extension. Therefore the provided output_str was for a file
48 | # and not a directory. Therefore, the output_dir must be set to the parent directory of
49 | # the file.
50 | if "." in self.output_dir.parts[-1]:
51 | self.output_dir = self.output_dir.parent
52 |
53 | # Create the output directory (and any parent directories) if it does not exist.
54 | if not self.output_dir.exists():
55 | self.output_dir.mkdir(parents=True)
56 |
57 | def create_report(self) -> None:
58 | """
59 | Creates the report file and writes it to the desired output file.
60 | :return: Nothing
61 | """
62 | logger.warning("'create_report' is not implemented in the base Report class.")
63 |
--------------------------------------------------------------------------------
/juneberry/transforms/random_mirror_flip.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | Simple transformer for mirroring or shifting an image. The JSON configuration accept two arguments for chance to flip.
27 |
28 | "config": { "mirror_chance": 0.0, "flip_chance": 0.0 }
29 |
30 | """
31 |
32 | import logging
33 | import sys
34 |
35 | import juneberry.image as jb_img_utils
36 |
37 | logger = logging.getLogger(__name__)
38 |
39 |
40 | class RandomMirrorFlip:
41 | def __init__(self, mirror_chance=None, flip_chance=None):
42 | self.mirror_chance = mirror_chance
43 | self.flip_chance = flip_chance
44 |
45 | if self.mirror_chance is None and self.flip_chance is None:
46 | logger.error(f"Neither 'mirror_chance' or 'flip_chance' found in specified. "
47 | f"EXITING.")
48 | sys.exit(-1)
49 |
50 | if self.mirror_chance is not None and not 0 <= self.mirror_chance <= 1:
51 | logger.error(f"mirror_chance must be a value in range (0,1)."
52 | f"mirror_chance value was {self.mirror_chance}"
53 | f"EXITING.")
54 | sys.exit(-1)
55 |
56 | if self.flip_chance is not None and not 0 <= self.flip_chance <= 1:
57 | logger.error(f"flip_chance must be a value in range (0,1)."
58 | f"flip_chance value was {self.flip_chance}"
59 | f"EXITING.")
60 | sys.exit(-1)
61 |
62 | def __call__(self, image):
63 | """
64 | Transformation function that is provided a PIL image.
65 | :param image: The source PIL image.
66 | :return: The transformed PIL image.
67 | """
68 | return jb_img_utils.random_mirror_flip(image, self.mirror_chance, self.flip_chance)
69 |
--------------------------------------------------------------------------------
/docs/specs/rules_list_specification.md:
--------------------------------------------------------------------------------
1 | Workflow Rules List
2 | ===============
3 |
4 | # Introduction
5 |
6 | This document describes the specification used by Juneberry when expressing a set of "build rules" to
7 | be used when building Juneberry experiments.
8 |
9 | # Schema
10 |
11 | ```
12 | Rule based version
13 | {
14 | "description": ,
15 | "format_version": ,
16 | "timestamp":
17 | "workflows": [
18 | {
19 | "name": "",
20 | "rules": [
21 | {
22 | "clean_extras": [ ]
23 | "id": 0,
24 | "doc": "short documentation string for the rule."
25 | "inputs": [ ],
26 | "outputs": [ ],
27 | "command:": [ ]
28 | "requirements": [ ]
29 | }
30 | ]
31 | }
32 | ]
33 | }
34 | ```
35 |
36 | # Details
37 | This section provides the details of each of the fields.
38 |
39 | ## description
40 | **Optional** prose description of this data set.
41 |
42 | ## format_version
43 | Linux-style version of **format** of the file. Not the version of
44 | the data, but the version of the semantics of the fields of this file.
45 | The current version: 0.1.0
46 |
47 | ## timestamp
48 | **Optional** time stamp (ISO format) for when this config was last modified.
49 |
50 | ## workflows
51 | This section contains a list of workflows that can be performed for the experiment.
52 |
53 | ### name
54 | The name of this workflow.
55 |
56 | ### rules
57 | A list of rules that are to be performed to complete this workflow. The rules must be ordered such that, when executed
58 | in the order provided, all the prerequisite inputs will be generated for subsequent rules. However, every “rules” entry
59 | will not necessarily require every previous “rules” entry. Thus, the order can be a **depth-first** or **breadth-first**
60 | representation of the dependencies.
61 |
62 | #### clean_extras
63 | Sometimes after a task we want to clean additional files that aren't necessarily known
64 | ahead of time so they can be listed as explicit targets. This property can be used
65 | to list glob patterns such as `*.png` for finding additional pieces to clean.
66 |
67 | #### id
68 | A unique id of the rule.
69 |
70 | #### doc
71 | A short documentation string to display when listing rules or for log output.
72 |
73 | #### inputs
74 | A list of all input files that are required to execute this rule.
75 |
76 | #### outputs
77 | A list of outputs that are generated by this rule.
78 |
79 | #### command
80 | The command required to execute this rule.
81 |
82 | #### requirements
83 | A list of immediate pre-requisite rule ids required by this rule.
84 |
85 |
86 | # Copyright
87 |
88 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms.
89 |
--------------------------------------------------------------------------------
/juneberry/config/report.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 | import sys
27 |
28 | from prodict import List, Prodict
29 |
30 | from juneberry.config.plugin import Plugin
31 | import juneberry.config.util as jb_conf_utils
32 | import juneberry.filesystem as jb_fs
33 |
34 | logger = logging.getLogger(__name__)
35 |
36 |
37 | class ReportConfig(Prodict):
38 | FORMAT_VERSION = '0.1.0'
39 | SCHEMA_NAME = 'report_schema.json'
40 | reports: List[Plugin]
41 |
42 | @staticmethod
43 | def construct(data: dict, file_path: str = None):
44 | """
45 | Load, validate, and construct a config object from a supposedly VALID and LATEST FORMAT report.
46 | :param data: The data to use to construct the object.
47 | :param file_path: Optional path to a file that may have been loaded. Used for logging.
48 | :return: A constructed and validated object.
49 | """
50 |
51 | # Validate
52 | if not jb_conf_utils.validate_schema(data, ReportConfig.SCHEMA_NAME):
53 | logger.error(f"Validation errors in ReportConfig from {file_path}. See log. Exiting.")
54 | sys.exit(-1)
55 |
56 | # Finally, construct the object and do a final value cleanup
57 | report_config = ReportConfig.from_dict(data)
58 | return report_config
59 |
60 | @staticmethod
61 | def load(data_path: str):
62 | """
63 | Load the config from the provided path, validate, and construct the config.
64 | :param data_path: Path to config.
65 | :return: Loaded, validated, and constructed object.
66 | """
67 | # Load the raw file.
68 | logger.info(f"Loading REPORT CONFIG from {data_path}")
69 | data = jb_fs.load_file(data_path)
70 |
71 | # Validate and construct the model.
72 | return ReportConfig.construct(data, data_path)
73 |
--------------------------------------------------------------------------------
/test/metrics/classification/test_classification_metrics.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import json
26 | from pathlib import Path
27 | from typing import List
28 |
29 | import numpy
30 | import pytest
31 | import torch
32 |
33 | import juneberry.metrics.classification.metrics_manager as mm
34 | from juneberry.config.model import Plugin
35 |
36 | test_data_dir = Path(__file__).resolve().parent / "data"
37 |
38 | config_filename = test_data_dir / "config_classification.json"
39 |
40 | with open(config_filename, 'r') as f:
41 | config_data = json.load(f)
42 |
43 | metrics_plugins: List[Plugin] = []
44 | for cd in config_data["metrics"]:
45 | metrics_plugins.append(Plugin.from_dict(cd))
46 |
47 | target = torch.tensor([0, 1, 2])
48 | preds = torch.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
49 |
50 | # metrics plugins take numpy inputs
51 | with torch.set_grad_enabled(False):
52 | preds_np = preds.cpu().numpy()
53 | target_np = target.cpu().detach().numpy()
54 |
55 | metrics_mgr = mm.MetricsManager(metrics_plugins)
56 | metrics = metrics_mgr(target_np, preds_np, binary = False)
57 |
58 | def approx(expected_val):
59 | return pytest.approx(expected_val, abs=5e-3)
60 |
61 | def test_torchmetrics_functional():
62 | assert numpy.equal(metrics["func_accuracy"], numpy.array(0.6666667, dtype=numpy.float32))
63 |
64 | def test_torchmetrics_classbased():
65 | assert numpy.equal(metrics["obj_accuracy"], numpy.array(0.6666667, dtype=numpy.float32))
66 |
67 | def test_torchnn():
68 | assert numpy.equal(metrics["loss"], numpy.array(1.3038288, dtype=numpy.float32))
69 |
70 | def test_sklearn_metrics():
71 | assert metrics["accuracy_score"] == 0
72 |
73 | def test_tensorflow_classbased():
74 | assert metrics["tf_accuracy"] == 3.0
75 |
76 | def test_tensorflow_functional():
77 | assert metrics["tf_binary_accuracy"] == approx(0.33333334)
78 |
--------------------------------------------------------------------------------
/docker/databricks/monit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | ### BEGIN INIT INFO
4 | # Provides: monit
5 | # Required-Start: $remote_fs
6 | # Required-Stop: $remote_fs
7 | # Should-Start: $all
8 | # Should-Stop: $all
9 | # Default-Start: 2 3 4 5
10 | # Default-Stop: 0 1 6
11 | # Short-Description: service and resource monitoring daemon
12 | # Description: monit is a utility for managing and monitoring
13 | # processes, programs, files, directories and filesystems
14 | # on a Unix system. Monit conducts automatic maintenance
15 | # and repair and can execute meaningful causal actions
16 | # in error situations.
17 | ### END INIT INFO
18 |
19 | set -e
20 |
21 | . /lib/lsb/init-functions
22 |
23 | DAEMON=/usr/bin/monit
24 | CONFIG=/etc/monit/monitrc
25 | NAME=monit
26 | DESC="daemon monitor"
27 | MONIT_OPTS=
28 | PID="/run/$NAME.pid"
29 |
30 | # Check if DAEMON binary exist
31 | [ -f $DAEMON ] || exit 0
32 |
33 | [ -f "/etc/default/$NAME" ] && . /etc/default/$NAME
34 |
35 | MONIT_OPTS="-c $CONFIG $MONIT_OPTS"
36 |
37 | monit_not_configured () {
38 | if [ "$1" != "stop" ]
39 | then
40 | printf "\tplease configure $NAME and then edit /etc/default/$NAME\n"
41 | printf "\tand set the \"START\" variable to \"yes\" in order to allow\n"
42 | printf "\t$NAME to start\n"
43 | fi
44 | exit 0
45 | }
46 |
47 | monit_checks () {
48 | # Check if START variable is set to "yes", if not we exit.
49 | if [ "$START" != "yes" ]
50 | then
51 | monit_not_configured $1
52 | fi
53 | }
54 |
55 | case "$1" in
56 | start)
57 | log_daemon_msg "Starting $DESC" "$NAME"
58 | monit_checks $1
59 | if start-stop-daemon --start --quiet --oknodo --pidfile $PID --exec $DAEMON -- $MONIT_OPTS 1>/dev/null
60 | then
61 | log_end_msg 0
62 | else
63 | log_end_msg 1
64 | fi
65 | ;;
66 | stop)
67 | log_daemon_msg "Stopping $DESC" "$NAME"
68 | if start-stop-daemon --retry TERM/5/KILL/5 --oknodo --stop --quiet --pidfile $PID 1>/dev/null
69 | then
70 | log_end_msg 0
71 | else
72 | log_end_msg 1
73 | fi
74 | ;;
75 | reload)
76 | log_daemon_msg "Reloading $DESC configuration" "$NAME"
77 | if start-stop-daemon --stop --signal HUP --quiet --oknodo --pidfile $PID --exec $DAEMON -- $MONIT_OPTS 1>/dev/null
78 | then
79 | log_end_msg 0
80 | else
81 | log_end_msg 1
82 | fi
83 | ;;
84 | restart|force-reload)
85 | log_daemon_msg "Restarting $DESC" "$NAME"
86 | start-stop-daemon --retry TERM/5/KILL/5 --oknodo --stop --quiet --pidfile $PID 1>/dev/null
87 | if start-stop-daemon --start --quiet --oknodo --pidfile $PID --exec $DAEMON -- $MONIT_OPTS 1>/dev/null
88 | then
89 | log_end_msg 0
90 | else
91 | log_end_msg 1
92 | fi
93 | ;;
94 | syntax)
95 | $DAEMON $MONIT_OPTS -t
96 | ;;
97 | status)
98 | status_of_proc -p $PID $DAEMON $NAME
99 | ;;
100 | *)
101 | log_action_msg "Usage: /etc/init.d/$NAME {start|stop|reload|restart|force-reload|syntax|status}"
102 | ;;
103 | esac
104 |
105 | exit 0
106 |
--------------------------------------------------------------------------------
/juneberry/pytorch/tabular_dataset.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import numpy as np
26 |
27 | from juneberry.pytorch.utils import EpochDataset
28 |
29 |
30 | class TabularDataset(EpochDataset):
31 | """
32 | Loads data from a list of CSV files.
33 | We assume the CSV has a header in each input file, and that the headers are the same.
34 | We extract the column that has the label number.
35 | """
36 |
37 | def __init__(self, rows_labels, transforms=None):
38 | """
39 | Initialize the tabular data set loader.
40 | :param rows_labels: A list of pairs of the row data and labels.
41 | :param transforms: Any transforms to be applied to each row of floats per epoch.
42 | """
43 | super().__init__()
44 |
45 | self.transforms = transforms
46 | for item in rows_labels:
47 | assert len(item) == 2
48 | self.rows_labels = []
49 |
50 | # Pre-process the entire thing to big float arrays so it is ready for transformation.
51 | for row, label in rows_labels:
52 | row = [float(x) for x in row]
53 | self.rows_labels.append([row, label])
54 |
55 | def __len__(self):
56 | """ :return: Total number of samples. """
57 | return len(self.rows_labels)
58 |
59 | def __getitem__(self, index):
60 | """
61 | Return one item.
62 | :param index: The index within the data set.
63 | :return: One transformed item with label
64 | """
65 | row, label = self.rows_labels[index]
66 |
67 | if self.transforms is not None:
68 | row = row.copy()
69 | args = {'label': label, 'index': index, 'epoch': self.epoch}
70 | row, label = self.transforms(row, **args)
71 |
72 | # They want a row as float
73 | row = np.array(row).astype(np.float32)
74 |
75 | return row, label
76 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Juneberry - Release 0.5
2 |
3 | Copyright 2022 Carnegie Mellon University.
4 |
5 | BSD (SEI)
6 |
7 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
8 | following conditions are met:
9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
10 | disclaimer.
11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
12 | following disclaimer in the documentation and/or other materials provided with the distribution.
13 | 3. Products derived from this software may not include “Carnegie Mellon University,” "SEI” and/or “Software
14 | Engineering Institute" in the name of such derived product, nor shall “Carnegie Mellon University,” "SEI”
15 | and/or “Software Engineering Institute" be used to endorse or promote products derived from this software
16 | without prior written permission. For written permission, please contact permission@sei.cmu.edu.
17 |
18 | ACKNOWLEDGMENTS AND DISCLAIMERS:
19 | Juneberry - Release 0.5 includes and/or can make use of certain third party software ("Third Party Software"). The
20 | Third Party Software that is used by Juneberry - Release 0.5 is dependent upon your system configuration, but
21 | typically includes the software identified in the documentation and/or ReadMe files. By using Juneberry - Release 0.5,
22 | you agree to comply with any and all relevant Third Party Software terms and conditions contained in any such Third
23 | Party Software or separate license file distributed with such Third Party Software. The parties who own the Third Party
24 | Software ("Third Party Licensors") are intended third party beneficiaries to this License with respect to the terms
25 | applicable to their Third Party Software. Third Party Software licenses only apply to the Third Party Software and not
26 | any other portion of Juneberry - Release 0.5 or Juneberry - Release 0.5 as a whole.
27 |
28 | This material is based upon work funded and supported by the Department of Defense under Contract No. FA8702-15-D-0002
29 | with Carnegie Mellon University for the operation of the Software Engineering Institute, a federally funded research
30 | and development center.
31 |
32 | The view, opinions, and/or findings contained in this material are those of the author(s) and should not be construed
33 | as an official Government position, policy, or decision, unless designated by other documentation.
34 |
35 | NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
36 | BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
37 | INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
38 | FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
39 | FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
40 |
41 | [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
42 | Copyright notice for non-US Government use and distribution.
43 |
44 | DM22-0856
--------------------------------------------------------------------------------
/juneberry/metrics/classification/torchmetrics/metrics.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | This module provides a torchmetrics classification metric plugin to be used with Juneberry.
27 | """
28 | import inspect
29 | import logging
30 | from typing import Dict
31 |
32 | import torch
33 |
34 | from juneberry.loader import construct_instance, load_verify_fqn_function
35 | from juneberry.metrics.classification.metrics import MetricsBase
36 |
37 | logger = logging.getLogger(__name__)
38 |
39 |
40 | class Metrics(MetricsBase):
41 |
42 | def __init__(self,
43 | fqn: str,
44 | name: str,
45 | kwargs: Dict = None) -> None:
46 | super().__init__(fqn, name, kwargs)
47 |
48 | def __call__(self, target, preds, binary):
49 | target, preds = torch.LongTensor(target), torch.FloatTensor(preds)
50 |
51 | # Torchmetrics has class-based and functional versions of its metrics.
52 | # If we fail to instantiate self.fqn as a function, try to construct a class instance instead.
53 | metrics_function = load_verify_fqn_function(self.fqn, {**{"preds": [], "target": []}, **self.kwargs})
54 | if not metrics_function:
55 | metrics_function = construct_instance(self.fqn, self.kwargs)
56 |
57 | # If metrics_function doesn't exist now, we were unable to instantiate either
58 | # a class instance or a functional version of the metric.
59 | if not metrics_function:
60 | log_msg = f"Unable to create metrics function: fqn={self.fqn}, name={self.name}, kwargs={self.kwargs}."
61 | logger.error(log_msg)
62 | raise ValueError(log_msg)
63 | else:
64 | if inspect.isfunction(metrics_function):
65 | result = metrics_function(preds, target, **self.kwargs)
66 | else:
67 | result = metrics_function(preds, target)
68 | return result.numpy()
69 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import setuptools
26 |
27 | extras = {
28 | 'tf': ['tensorflow', 'tensorflow-datasets'],
29 | 'torch': ['torch', 'torchvision', "torch-summary>=1.4.5", "torchmetrics"],
30 | 'onnx': ['protobuf==3.16.0', 'onnx', 'onnxruntime', 'tf2onnx'],
31 | 'onnx-gpu': ['protobuf==3.16.0', 'onnx', 'onnxruntime-gpu', 'tf2onnx'],
32 | 'opacus': ['opacus']
33 | }
34 | extras['all'] = extras['tf'] + \
35 | extras['torch'] + \
36 | extras['onnx'] + \
37 | extras['opacus']
38 | extras['all-gpu'] = extras['tf'] + \
39 | extras['torch'] + \
40 | extras['onnx-gpu'] + \
41 | extras['opacus']
42 |
43 | install_requires = [
44 | "doit",
45 | "numpy",
46 | "pycocotools",
47 | "matplotlib",
48 | "pillow",
49 | "prodict",
50 | "hjson",
51 | "jsonschema",
52 | "scikit-learn",
53 | "tqdm",
54 | "tensorboard",
55 | "pandas",
56 | "brambox",
57 | "pyyaml",
58 | "hjson",
59 | "natsort",
60 | "ray",
61 | "jsonpath-ng"
62 | ]
63 |
64 | bin_scripts = [
65 | 'bin/jb_attack_to_rules',
66 | 'bin/jb_clean_experiment_evals',
67 | 'bin/jb_evaluate',
68 | 'bin/jb_experiment_to_rules',
69 | 'bin/jb_generate_experiments',
70 | 'bin/jb_generate_watermark_eval',
71 | 'bin/jb_gpu_runner',
72 | 'bin/jb_process_dataset',
73 | 'bin/jb_report',
74 | 'bin/jb_rules_to_pydoit',
75 | 'bin/jb_run_experiment',
76 | 'bin/jb_run_plugin',
77 | 'bin/jb_train',
78 | 'bin/jb_tune'
79 | ]
80 |
81 | setuptools.setup(
82 | name='Juneberry',
83 | version='0.5.1',
84 | description='Juneberry Machine Learning Experiment Manager',
85 | packages=setuptools.find_packages(),
86 | install_requires=install_requires,
87 | scripts=bin_scripts,
88 | python_requires='>=3.7',
89 | include_package_data=True,
90 | extras_require=extras
91 | )
92 |
--------------------------------------------------------------------------------
/bin/jb_run_plugin:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | import logging
27 | import sys
28 |
29 | from juneberry.filesystem import load_json
30 | import juneberry.loader as jb_loader
31 | import juneberry.scripting.utils as jb_scripting
32 |
33 | logger = logging.getLogger("juneberry.jb_run_plugin")
34 |
35 |
36 | def run_plugin(plugin: str):
37 | """
38 | This function is responsible for constructing an instance of the desired plugin and then calling it.
39 | :param plugin: A string describing the location in the filesystem of the JSON file
40 | containing the plugin information.
41 | :return: Nothing.
42 | """
43 | # Retrieve the data from the JSON file.
44 | json = load_json(plugin)
45 |
46 | if 'fqcn' not in json:
47 | logger.error(f"Expected key 'fqcn' in target plugin JSON file was not found. Exiting.")
48 | sys.exit(-1)
49 |
50 | if 'kwargs' not in json:
51 | json['kwargs'] = {}
52 |
53 | # Construct an instance of the class described in the plugin file.
54 | plugin_obj = jb_loader.construct_instance(json['fqcn'], json['kwargs'])
55 |
56 | # Call the instance of the class (run the plugin).
57 | plugin_obj()
58 |
59 |
60 | def setup_args(parser) -> None:
61 | """
62 | Adds arguments to the parser
63 | :param parser: The parser in which to add arguments.
64 | """
65 | parser.add_argument('pluginFile', help='A JSON file containing the name of the class representing the desired '
66 | 'plugin to run, along with any kwargs to pass to the instance.')
67 |
68 |
69 | def main():
70 | # Setup and parse all arguments.
71 | parser = argparse.ArgumentParser(description="")
72 | setup_args(parser)
73 | jb_scripting.setup_args(parser)
74 | args = parser.parse_args()
75 |
76 | # Set up logging.
77 | jb_scripting.setup_logging_for_script(args)
78 |
79 | # Run the plugin.
80 | run_plugin(args.pluginFile)
81 |
82 | logger.info(f"jb_run_plugin is done.")
83 |
84 |
85 | if __name__ == "__main__":
86 | main()
87 |
--------------------------------------------------------------------------------
/bin/jb_clean_experiment_evals:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | This script cleans all of the predictions files produced by jb_evaluate.
27 |
28 | """
29 |
30 | import argparse
31 | import logging
32 | import subprocess
33 |
34 | from juneberry.config.rule_list import RulesList
35 | from juneberry.filesystem import ExperimentManager
36 | import juneberry.scripting.utils as jb_scripting
37 |
38 | logger = logging.getLogger("juneberry.jb_clean_experiment_evals")
39 |
40 |
41 | def setup_args(parser) -> None:
42 | """
43 | Adds arguments to the parser
44 | :param parser: The parser in which to add arguments.
45 | """
46 | parser.add_argument("experimentName", help='Name of the experiment in the experiments directory whose eval '
47 | 'directories should be cleaned.')
48 |
49 |
50 | def main():
51 | parser = argparse.ArgumentParser(description="Cleans the eval directories in an experiment.")
52 | setup_args(parser)
53 | jb_scripting.setup_args(parser)
54 | args = parser.parse_args()
55 |
56 | experiment_manager = ExperimentManager(args.experimentName)
57 | log_file = experiment_manager.get_log_path()
58 | banner_msg = f">>> Juneberry Experiment Eval Cleaner - {args.experimentName} <<<"
59 |
60 | lab = jb_scripting.setup_workspace(args, log_file=log_file, log_prefix="<> ", banner_msg=banner_msg)
61 | workspace_root = lab.workspace()
62 | rules_file = experiment_manager.get_experiment_rules()
63 | dodo_file = experiment_manager.get_experiment_dodo(workflow="main")
64 |
65 | rules = RulesList.load(rules_file)
66 |
67 | tasks = []
68 | for workflow in rules.workflows:
69 | for rule in workflow.rules:
70 | if rule.command[0] == "jb_evaluate":
71 | tasks.append(rule.id)
72 |
73 | for task in tasks:
74 | cmd = ["doit", "-f", dodo_file, "--dir", workspace_root, "clean", str(task)]
75 | subprocess.run(cmd)
76 |
77 | logger.info("jb_clean_experiment_evals is done.")
78 |
79 |
80 | if __name__ == "__main__":
81 | main()
82 |
--------------------------------------------------------------------------------
/test/test_coco_annotations.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import unittest
26 |
27 | from juneberry.config.coco_anno import CocoAnnotations
28 |
29 |
30 | def make_basic_config():
31 | # Based on https://blog.superannotate.com/coco-dataset-introduction/
32 | return {
33 | "info": {
34 | "year": 2021,
35 | "version": "1.2"
36 | },
37 | "licenses": [],
38 | "categories": [
39 | {
40 | "id": 1,
41 | "name": "poodle",
42 | "supercategory": "dog"
43 | },
44 | {
45 | "id": 2,
46 | "name": "ragdoll",
47 | "supercategory": "cat"
48 | }
49 | ],
50 | "images": [
51 | {
52 | "id": 122214,
53 | "width": 640,
54 | "height": 640,
55 | "file_name": "84.jpg",
56 | "license": 1,
57 | "date_captured": "2021-07-19 17:49"
58 | }
59 | ],
60 | "annotations": [
61 | {
62 | "area": 600.4,
63 | "iscrowd": 1,
64 | "image_id": 122214,
65 | "bbox": [473.05, 395.45, 38.65, 28.92],
66 | "category_id": 1,
67 | "id": 934
68 | }
69 | ]
70 | }
71 |
72 |
73 | class TestCocoAnno(unittest.TestCase):
74 | def test_config_basics(self):
75 | config = make_basic_config()
76 | coco_anno = CocoAnnotations.construct(config)
77 | assert len(config['images']) == len(coco_anno['images'])
78 | assert len(config['annotations']) == len(coco_anno['annotations'])
79 |
80 | def test_duplicate_images(self):
81 | config = make_basic_config()
82 | config['images'].append(config['images'][0])
83 |
84 | with self.assertRaises(SystemExit), self.assertLogs(level='ERROR') as log:
85 | CocoAnnotations.construct(config)
86 | message = "Found duplicate image id: id= '122214'."
87 | self.assertIn(message, log.output[0])
88 |
--------------------------------------------------------------------------------
/juneberry/schemas/coco_anno_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "type": "object",
4 | "properties": {
5 | "info": {
6 | "type": "object",
7 | "properties": {
8 | "year": { "type": "integer"},
9 | "version": { "type": "string"},
10 | "description": { "type": "string"},
11 | "contributor": { "type": "string"},
12 | "url": { "type": "string"},
13 | "date_created": { "type": "string"}
14 | }
15 | },
16 | "licenses": {
17 | "type": "array",
18 | "items": {
19 | "type": "object",
20 | "properties": {
21 | "id": { "type": "integer"},
22 | "name": { "type": "string" },
23 | "url": { "type": "string"}
24 | },
25 | "required": [
26 | "id",
27 | "name",
28 | "url"
29 | ]
30 | }
31 | },
32 | "categories": {
33 | "type": "array",
34 | "items": {
35 | "type": "object",
36 | "properties": {
37 | "id": { "type": "integer" },
38 | "name": { "type": "string" },
39 | "supercategory": { "type": "string" }
40 | },
41 | "required": [
42 | "id",
43 | "name"
44 | ]
45 | }
46 | },
47 | "images": {
48 | "type": "array",
49 | "items": {
50 | "type": "object",
51 | "properties": {
52 | "id": { "type": "integer" },
53 | "width": { "type": "integer" },
54 | "height": { "type": "integer" },
55 | "file_name": { "type": "string" },
56 | "license": { "type": "integer" },
57 | "flickr_url": { "type": "string" },
58 | "coco_url": { "type": "string" },
59 | "date_captured": { "type": "string" }
60 | },
61 | "required": [
62 | "id",
63 | "width",
64 | "height",
65 | "file_name"
66 | ]
67 | }
68 | },
69 | "annotations": {
70 | "type": "array",
71 | "items": {
72 | "type": "object",
73 | "properties": {
74 | "id": { "type": "integer" },
75 | "image_id": { "type": "integer" },
76 | "category_id": { "type": "integer" },
77 | "segmentation": {},
78 | "area": { "type": "number" },
79 | "bbox": {
80 | "type": "array",
81 | "items": { "type": "number" }
82 | },
83 | "iscrowd": {"type": "integer"},
84 | "score": { "type": "number" }
85 | },
86 | "required": [
87 | "id",
88 | "image_id",
89 | "category_id"
90 | ]
91 | }
92 | }
93 | },
94 | "required": [
95 | "categories",
96 | "images"
97 | ]
98 | }
99 |
--------------------------------------------------------------------------------
/scripts/reformat_predictions.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | import json
27 | from pathlib import Path
28 | import sys
29 |
30 |
31 | def reformat_data(manifest, pred):
32 | # The manifest and predictions are in the same order, so just numerically
33 | # walk the number of images and spew out a new structure.
34 | new_pred = []
35 |
36 | pred_labels = pred['results']['labels']
37 | pred_preds = pred['results']['predictions']
38 |
39 | for idx, item in enumerate(manifest):
40 | # Double check the label
41 | assert item['label'] == pred_labels[idx]
42 |
43 | # Make a new entry
44 | new_pred.append({
45 | "path": item['path'],
46 | "label": item['label'],
47 | "predictions": pred_preds[idx]
48 | })
49 |
50 | # Replace it in the predictions structure and return
51 | new_out = pred.copy()
52 | del new_out['results']['labels']
53 | new_out['results']['predictions'] = new_pred
54 |
55 | return new_out
56 |
57 |
58 | def reformat_file(eval_dir: str):
59 | manifest_path = Path(eval_dir) / "eval_manifest.json"
60 | pred_path = Path(eval_dir) / "predictions.json"
61 | out_path = Path(eval_dir) / "predictions_v2.json"
62 |
63 | if not manifest_path.exists():
64 | print(f"Missing '{manifest_path}' file. Exiting.")
65 | sys.exit()
66 |
67 | if not pred_path.exists():
68 | print(f"Missing '{pred_path}' file. Exiting.")
69 | sys.exit()
70 |
71 | with open(pred_path) as pred_file:
72 | pred_data = json.load(pred_file)
73 |
74 | with open(manifest_path) as manifest_file:
75 | manifest_data = json.load(manifest_file)
76 |
77 | out_data = reformat_data(manifest_data, pred_data)
78 |
79 | with open(out_path, "w") as out_file:
80 | json.dump(out_data, out_file, indent=4)
81 |
82 |
83 | def main():
84 | parser = argparse.ArgumentParser()
85 | parser.add_argument("eval_dir", help="Path to directory with predictions and manifest.")
86 | args = parser.parse_args()
87 | reformat_file(args.eval_dir)
88 |
89 |
90 | if __name__ == "__main__":
91 | main()
92 |
--------------------------------------------------------------------------------
/juneberry/config/hashes.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 | import sys
27 |
28 | from prodict import Prodict
29 |
30 | import juneberry.config.util as jb_conf_utils
31 | import juneberry.filesystem as jb_fs
32 |
33 | logger = logging.getLogger(__name__)
34 |
35 |
36 | class Hashes(Prodict):
37 | FORMAT_VERSION = '0.3.0'
38 | SCHEMA_NAME = 'hashes_schema.json'
39 |
40 | model_architecture: str
41 |
42 | @staticmethod
43 | def construct(data: dict, file_path: str = None):
44 | """
45 | Validate and construct a Hashes object.
46 | :param data: The data to use to construct the object.
47 | :param file_path: Optional path to a file that may have been loaded. Used for logging.
48 | :return: A constructed object.
49 | """
50 |
51 | # Validate with our schema
52 | if not jb_conf_utils.validate_schema(data, Hashes.SCHEMA_NAME):
53 | logger.error(f"Validation errors in Hashes object from {file_path}. See log. Exiting!")
54 | sys.exit(-1)
55 |
56 | # Finally, construct the object
57 | return Hashes.from_dict(data)
58 |
59 | @staticmethod
60 | def load(data_path: str):
61 | """
62 | Load the config from the provided path, validate, and construct the config.
63 | :param data_path: Path to config.
64 | :return: Loaded, validated, and constructed object.
65 | """
66 | # Load the raw file.
67 | logger.info(f"Loading HASHES CONFIG from {data_path}")
68 | data = jb_fs.load_file(data_path)
69 |
70 | # Validate and construct the model.
71 | return Hashes.construct(data, data_path)
72 |
73 | def to_json(self):
74 | """ :return: A pure dictionary version suitable for serialization to json."""
75 | return jb_conf_utils.prodict_to_dict(self)
76 |
77 | def save(self, data_path: str) -> None:
78 | """
79 | Save the HashesConfig to the specified resource path.
80 | :param data_path: The path to the resource.
81 | :return: None
82 | """
83 | jb_conf_utils.validate_and_save_json(self.to_json(), data_path, Hashes.SCHEMA_NAME)
84 |
--------------------------------------------------------------------------------
/scripts/draw_boxes_from_anno_file.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | import logging
27 | from pathlib import Path
28 | import sys
29 |
30 | import juneberry.config.coco_utils as coco_utils
31 | import juneberry.scripting.utils as jb_scripting
32 |
33 | logger = logging.getLogger("juneberry.scripts.draw_boxes_from_anno_file")
34 |
35 |
36 | def setup_args(parser) -> None:
37 | """
38 | Adds arguments to the parser
39 | :param parser: The parser in which to add arguments.
40 | """
41 | parser.add_argument('annotationsFile',
42 | help="COCO annotations file describing both the raw images, and the bounding boxes around "
43 | "the objects that were detected in each image. ")
44 | parser.add_argument('-o', '--outputDir',
45 | help='An optional output directory where the image results will be saved. When this argument '
46 | 'is not provided, the images will be saved to the current working directory in a '
47 | 'directory named "boxed_imgs".')
48 |
49 |
50 | def main():
51 | # Setup and parse all arguments.
52 | parser = argparse.ArgumentParser(description="This script takes a COCO annotations file and produces a directory "
53 | "of images with bounding boxes drawn around the objects "
54 | "described in the annotations.")
55 | setup_args(parser)
56 | jb_scripting.setup_args(parser)
57 | args = parser.parse_args()
58 |
59 | # Set up the Lab.
60 | lab = jb_scripting.setup_workspace(args, log_file=None)
61 |
62 | # Check if the desired annotations file exists. Log an error and exit if it can't be found.
63 | anno_file = Path(args.annotationsFile)
64 | if not anno_file.exists():
65 | logger.error(f"The annotations file {anno_file} was not found. EXITING.")
66 | sys.exit(-1)
67 |
68 | # Add the bounding boxes to the images and save them to the output directory.
69 | coco_utils.generate_bbox_images(anno_file, lab, args.outputDir)
70 |
71 | logger.info("Done.")
72 |
73 |
74 | if __name__ == "__main__":
75 | main()
76 |
--------------------------------------------------------------------------------
/scripts/coco_image_use.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | import json
27 | import logging
28 | from pathlib import Path
29 |
30 | import juneberry.config.coco_utils as coco_utils
31 | import juneberry.scripting.utils as jb_scripting
32 |
33 | logger = logging.getLogger("juneberry.coco_image_use.py")
34 |
35 |
36 | def setup_args(parser):
37 | parser.add_argument("model", help="Model to search through.")
38 | parser.add_argument("file_name", help="Image filename to search for.")
39 | parser.add_argument("-e", "--evals", default=False, action='store_true', help="Also scan all eval directories.")
40 |
41 |
42 | def show_uses(coco_path, file_name):
43 | # We get the annotations as a merged file list to make it easy to find.
44 | # We just scan the list looking for that filename, and if we find it, show it.
45 | logger.info(f"Searching {coco_path} for {file_name}...")
46 | helper = coco_utils.load_from_json_file(coco_path)
47 | flat_list = helper.to_image_list()
48 |
49 | for entry in flat_list:
50 | entry_file_path = Path(entry.file_name)
51 | if file_name == entry.file_name or file_name == entry_file_path.name:
52 | logger.info(json.dumps(entry, indent=4))
53 | return
54 |
55 | logger.info(f" {file_name} was not found in {coco_path}")
56 |
57 |
58 | def main():
59 | parser = argparse.ArgumentParser(description="This script searches the specified model for "
60 | "uses of the specified image.")
61 | jb_scripting.setup_args(parser)
62 | setup_args(parser)
63 | args = parser.parse_args()
64 |
65 | # Get the lab and model manager
66 | lab = jb_scripting.setup_for_single_model(args, log_file=None, model_name=args.model)
67 | model_manager = lab.model_manager(args.model)
68 |
69 | # See if we can find them.
70 | show_uses(model_manager.get_training_data_manifest_path(), args.file_name)
71 | show_uses(model_manager.get_validation_data_manifest_path(), args.file_name)
72 |
73 | if args.evals:
74 | logger.info("Scanning eval dirs.")
75 | for eval_dir in model_manager.iter_eval_dirs():
76 | show_uses(eval_dir.get_manifest_path(), args.file_name)
77 |
78 |
79 | if __name__ == "__main__":
80 | main()
81 |
--------------------------------------------------------------------------------
/juneberry/schemas/evaluation_output_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "type": "object",
4 | "properties": {
5 | "format_version": { "type": "string" },
6 | "options": {
7 | "type": "object",
8 | "properties": {
9 | "dataset": {
10 | "type": "object",
11 | "properties": {
12 | "classes": { "type": "object" },
13 | "config": { "type": "string" },
14 | "histogram": { "type": "object" }
15 | },
16 | "required": [ "config" ]
17 | },
18 | "model": {
19 | "type": "object",
20 | "properties": {
21 | "hash": { "type": "string" },
22 | "name": { "type": "string" },
23 | "num_classes": { "type": "number" }
24 | },
25 | "required": [ "name" ]
26 | }
27 | },
28 | "required": [ "dataset", "model" ]
29 | },
30 | "results": {
31 | "type": "object",
32 | "properties": {
33 | "classifications": {
34 | "type": "array",
35 | "items": {
36 | "type": "object",
37 | "properties": {
38 | "file": { "type": "string" },
39 | "actual_label": { "type": "number" },
40 | "actual_label_name": { "type": "string" },
41 | "predicted_classes": {
42 | "type": "array",
43 | "items": {
44 | "type": "object",
45 | "properties": {
46 | "label": { "type": "number" },
47 | "label_name": { "type": "string" },
48 | "confidence": { "type": "number" }
49 | }
50 | }
51 | }
52 | }
53 | }
54 | },
55 | "labels": {
56 | "type": "array",
57 | "items": { "type": "number" }
58 | },
59 | "metrics": {
60 | "type": "object",
61 | "properties": {
62 | "classification": { "type": "object" },
63 | "bbox": { "type": "object" },
64 | "bbox_per_class": { "type": "object" }
65 | },
66 | "required": [ ]
67 | },
68 | "predictions": {
69 | "type": "array",
70 | "items": {
71 | "type": "array",
72 | "items": { "type": "number" }
73 | }
74 | }
75 | },
76 | "required": [ ]
77 | },
78 | "times": {
79 | "type": "object",
80 | "properties": {
81 | "duration": { "type": "number" },
82 | "end_time": { "type": "string" },
83 | "start_time": { "type": "string" }
84 | },
85 | "required": [ ]
86 | }
87 | },
88 | "required": [
89 | "options",
90 | "results"
91 | ]
92 | }
93 |
--------------------------------------------------------------------------------
/juneberry/transforms/image.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | """
26 | A set of general image conversions.
27 | """
28 |
29 | from PIL import Image
30 |
31 | import juneberry.image as jb_img_utils
32 |
33 |
34 | class ConvertMode:
35 | """
36 | Converts the mode of the input image to the specified mode.
37 | "kwargs": { "mode": 'RGB' }
38 | """
39 |
40 | def __init__(self, mode):
41 | self.mode = mode
42 |
43 | def __call__(self, image):
44 | if image.mode != self.mode:
45 | return image.convert(self.mode)
46 |
47 | return image
48 |
49 |
50 | class ResizePad:
51 | """
52 | Resizes the image maintaining aspect ratio, padding with the specified color if necessary.
53 |
54 | NOTE: This uses Image.ANTIALIAS resampling.
55 |
56 | "kwargs": { "width": 224, "height": 224, "color": [ 0,0,0 ] }
57 | """
58 |
59 | def __init__(self, width, height, pad_color=(0, 0, 0)):
60 | self.width = width
61 | self.height = height
62 | self.color = pad_color
63 |
64 | def __call__(self, image):
65 | return jb_img_utils.resize_image(image, self.width, self.height, self.color)
66 |
67 |
68 | class ChangeAllLabelsTo:
69 | def __init__(self, label):
70 | self.label = label
71 |
72 | def __call__(self, image, label):
73 | return image, self.label
74 |
75 |
76 | class Watermark:
77 | def __init__(self, watermark_path, min_scale=1.0, max_scale=1.0, rotation=0, blur=0):
78 | # NOTE: Opening is lazy we need to force loading with load()
79 | self.watermark = Image.open(watermark_path).copy()
80 | self.min_scale = min_scale
81 | self.max_scale = max_scale
82 | self.rotation = rotation
83 | self.blur = blur
84 |
85 | def __call__(self, image):
86 | # Copy the watermark so we can munge it
87 | tmp_img: Image = self.watermark.copy()
88 |
89 | # Transform watermark
90 | tmp_img = jb_img_utils.transform_image(tmp_img, (self.min_scale, self.max_scale), self.rotation, self.blur)
91 |
92 | # Insert at a random location
93 | x, y = jb_img_utils.make_random_insert_position(tmp_img.size, image.size)
94 | image = jb_img_utils.insert_watermark_at_position(image, tmp_img, (x, y))
95 |
96 | return image
97 |
--------------------------------------------------------------------------------
/juneberry/tensorboard.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | from torch.utils.tensorboard import SummaryWriter
26 |
27 |
28 | class TensorBoardManager:
29 | """
30 | Responsible for logging data for TensorBoard.
31 | """
32 |
33 | def __init__(self, tb_root, model_manager):
34 | self.tensorboard_root = tb_root
35 | self.log_dir = model_manager.create_tensorboard_directory_name(tb_root)
36 | self.summary_writer = SummaryWriter(log_dir=self.log_dir)
37 |
38 | layout = {
39 | 'Accuracy': {
40 | 'accuracy': ['Multiline', ['accuracy/combined', 'accuracy/train', 'accuracy/val']]
41 | },
42 | 'Learning Rate': {
43 | 'learning rate': ['Multiline', []]
44 | },
45 | 'Loss': {
46 | 'loss': ['Multiline', ['loss/combined', 'loss/train', 'loss/val']]
47 | }
48 | }
49 | self.summary_writer.add_custom_scalars(layout)
50 |
51 | def update(self, history, epoch) -> None:
52 | """
53 | Write data to the tensorboard log.
54 | :param history: A data structure that tracks the training history
55 | :param epoch: An epoch number that can be used to look up a particular moment in the history.
56 | :return:
57 | """
58 | self.summary_writer.add_scalar('Accuracy/train', history['accuracy'][epoch], epoch)
59 | self.summary_writer.add_scalar('Accuracy/val', history['val_accuracy'][epoch], epoch)
60 | self.summary_writer.add_scalars('Accuracy/combined', {'train': history['accuracy'][epoch],
61 | 'val': history['val_accuracy'][epoch]}, epoch)
62 | self.summary_writer.add_scalar('Loss/train', history['loss'][epoch], epoch)
63 | self.summary_writer.add_scalar('Loss/val', history['val_loss'][epoch], epoch)
64 | self.summary_writer.add_scalars('Loss/combined', {'train': history['loss'][epoch],
65 | 'val': history['val_loss'][epoch]}, epoch)
66 | self.summary_writer.add_scalar('Learning Rate', history['lr'][epoch], epoch)
67 |
68 | def close(self) -> None:
69 | """
70 | Closes the summary writer.
71 | :return:
72 | """
73 | self.summary_writer.close()
74 |
--------------------------------------------------------------------------------
/scripts/model_transform.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import argparse
26 | import logging
27 | import sys
28 |
29 | import juneberry.pytorch.utils as pyt_utils
30 | from juneberry.transforms.transform_manager import TransformManager
31 | import juneberry.filesystem as jb_fs
32 |
33 | logger = logging.getLogger("juneberry.jb_model_transform")
34 |
35 |
36 | def convert_model(model_architecture, model_transforms, num_model_classes):
37 | model = pyt_utils.construct_model(model_architecture, num_model_classes)
38 |
39 | # Apply model transforms.
40 | transforms = TransformManager(model_transforms)
41 | transforms.transform(model)
42 |
43 |
44 | def main():
45 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
46 |
47 | # Setup and parse all arguments.
48 | parser = argparse.ArgumentParser(description="Constructs a model, applies transforms, and exits."
49 | "The config must be a subset of the training config and it"
50 | "must contain 'model_architecture' and 'model_transforms'"
51 | "stanzas. For loading weights and saving, include appropriate"
52 | "transforms in the 'model_transforms' stanza, as this has no inherent"
53 | "output.")
54 |
55 | parser.add_argument("config_path", help="Path to the config file with 'model_architecture' and 'model_transforms'.")
56 | parser.add_argument("num_model_classes", type=int, help="Number of model classes to use on construction.")
57 |
58 | args = parser.parse_args()
59 |
60 | # NOTE: We do NOT use the ModelConfig loader, because we do not require a full config at this time.
61 | config = jb_fs.load_file(args.config_path)
62 |
63 | if 'model_architecture' not in config:
64 | logger.error("Config does not have stanza 'model_architecture'. EXITING.")
65 | sys.exit(-1)
66 |
67 | if 'model_transforms' not in config:
68 | logger.error("Config does not have stanza 'model_transforms'. EXITING.")
69 | sys.exit(-1)
70 |
71 | convert_model(config['model_architecture'], config['model_transforms'], args.num_model_classes)
72 |
73 |
74 | if __name__ == "__main__":
75 | main()
76 |
--------------------------------------------------------------------------------
/docs/building_docker.md:
--------------------------------------------------------------------------------
1 | Building Juneberry Docker Containers
2 | ==========
3 |
4 | ***
5 |
6 | **WARNING: These containers and scripts create containers with NO SECURITY PRACTICES,
7 | such as separate user accounts, unprivileged users, etc.**
8 |
9 | **USE AT YOUR OWN RISK.**
10 |
11 | ***
12 |
13 |
14 | # Overview
15 |
16 | This directory contains **Dockerfile**s, scripts for building various images for use with Juneberry, and some
17 | convenience scripts for running images.
18 |
19 |
20 | # Dockerfiles
21 |
22 | ## cpudev.Dockerfile
23 |
24 | An image with full cpu development support. Checkpoints NOT included.
25 |
26 | ## cudadev.Dockerfile
27 |
28 | The image to be used for development on cuda platforms. Checkpoints NOT included.
29 |
30 | # Building
31 |
32 | To build a particular docker image, use normal docker build commands, or the convenience script `build.sh`.
33 | The build script takes one argument, which is the part before the period in the Dockerfile name.
34 | For example, to build the cudadev image use `./build.sh cudadev`.
35 |
36 | # Automatic command execution on start
37 |
38 | When the containers start up they will look for a script called "container_start.sh" in the /juneberry
39 | directory (well, the one mounted as /juneberry) and, if found, will execute it. This is useful for
40 | automatically installing juneberry such as `pip install -e .` or running some test or something else.
41 |
42 | # Container layout
43 |
44 | The development process is based around the following lab layout:
45 |
46 | * /juneberry - Mount from the external users directory
47 | * /datasets - Mount to the external data directories.
48 | * /tensorboard - Mount point for tensorboard output
49 | * /root/.cache/torch/hub - Mounted for model caches for PyTorch and MMDetection
50 | * /root/.torch - Mounted for model caches for Detectron2
51 |
52 | The containers set the dataroot and tensorboard environment variables automatically. The current working
53 | directory will be chosen as the workspace, unless specified otherwise.
54 |
55 | # Convenience Scripts
56 |
57 | In addition to the script for building images, there are also some convenience scripts here.
58 |
59 | ## enter_juneberry_container
60 |
61 | This script starts up a **temporary** 'cudadev' container on your host using all available gpus.
62 | It assumes a project directory structure that contains a set of special subdirectories where each
63 | subdirectory becomes a mound point within the container. This parent directory should be passed as the argument
64 | into enter_juneberry_container.
65 |
66 | The structure is:
67 |
68 | * juneberry <- This is the Juneberry repo that was pulled
69 | * datasets <- This is where the source data is located, i.e. the "dataroot" that Juneberry will look at.
70 | * tensorboard <- This is where the tensorboard outputs will be stored.
71 | * cache <- This where the model downloads are cached.
72 |
73 | For example, if this structure was in the directory `~/proj` then to use the `enter_juneberry_container`
74 | change into `~/proj` and run:
75 |
76 | `./juneberry/docker/enter_juneberry_container .`
77 |
78 | See the comments within the script for how to configure it to use a cpu-only container, adjust environment
79 | variables, add other mount points and configure gpus.
80 |
81 | ## set_user.sh
82 |
83 | This optional convenience script can create a user inside the container to match an external
84 | user, resulting in the correct permissions for volumes mounted inside the container. See the script
85 | for an explanation of how it works in conjunction with enter_juneberry_container.
86 |
87 | # Copyright
88 |
89 | Copyright 2022 Carnegie Mellon University. See LICENSE.txt file for license terms.
90 |
--------------------------------------------------------------------------------
/juneberry/metrics/objectdetection/brambox/format.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 | from typing import Dict
27 |
28 | from juneberry.config.eval_output import Metrics
29 |
30 | logger = logging.getLogger(__name__)
31 |
32 |
33 | class DefaultFormatter:
34 | def __init__(self):
35 | pass
36 |
37 | def __call__(self, metrics: Dict):
38 | coco_metrics = metrics["juneberry.metrics.objectdetection.brambox.metrics.Coco"]
39 | tide_metrics = metrics["juneberry.metrics.objectdetection.brambox.metrics.Tide"]
40 | summary_metrics = metrics["juneberry.metrics.objectdetection.brambox.metrics.Summary"]
41 |
42 | result = Metrics()
43 | result.bbox = {}
44 | result.bbox_per_class = {}
45 | result.summary = {}
46 |
47 | result.bbox["mAP"] = coco_metrics["mAP_coco"]
48 | result.bbox["mAP_50"] = coco_metrics["mAP_50"]
49 | result.bbox["mAP_75"] = coco_metrics["mAP_75"]
50 | result.bbox["mAP_s"] = coco_metrics["mAP_small"]
51 | result.bbox["mAP_m"] = coco_metrics["mAP_medium"]
52 | result.bbox["mAP_l"] = coco_metrics["mAP_large"]
53 |
54 | result.bbox["mdAP_localisation"] = tide_metrics["mdAP_localisation"]
55 | result.bbox["mdAP_classification"] = tide_metrics["mdAP_classification"]
56 | result.bbox["mdAP_both"] = tide_metrics["mdAP_both"]
57 | result.bbox["mdAP_duplicate"] = tide_metrics["mdAP_duplicate"]
58 | result.bbox["mdAP_background"] = tide_metrics["mdAP_background"]
59 | result.bbox["mdAP_missed"] = tide_metrics["mdAP_missed"]
60 | result.bbox["mdAP_fp"] = tide_metrics["mdAP_fp"]
61 | result.bbox["mdAP_fn"] = tide_metrics["mdAP_fn"]
62 |
63 | for key, value in coco_metrics.items():
64 | if not key.startswith("mAP"):
65 | result.bbox_per_class["mAP_" + key] = value
66 |
67 | result.summary["pr_auc"] = summary_metrics["pr_auc"]
68 | result.summary["pc_auc"] = summary_metrics["pc_auc"]
69 | result.summary["rc_auc"] = summary_metrics["rc_auc"]
70 | result.summary["max_r"] = summary_metrics["max_r"]
71 | result.summary["ap"] = summary_metrics["ap"]
72 | result.summary["tp"] = summary_metrics["prediction_types"]["tp"]
73 | result.summary["fp"] = summary_metrics["prediction_types"]["fp"]
74 | result.summary["fn"] = summary_metrics["prediction_types"]["fn"]
75 |
76 | return result.to_dict()
77 |
--------------------------------------------------------------------------------
/juneberry/onnx/default.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | # ======================================================================================================================
4 | # Juneberry - Release 0.5
5 | #
6 | # Copyright 2022 Carnegie Mellon University.
7 | #
8 | # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS"
9 | # BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER
10 | # INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED
11 | # FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM
12 | # FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
13 | #
14 | # Released under a BSD (SEI)-style license, please see license.txt or contact permission@sei.cmu.edu for full terms.
15 | #
16 | # [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see
17 | # Copyright notice for non-US Government use and distribution.
18 | #
19 | # This Software includes and/or makes use of Third-Party Software each subject to its own license.
20 | #
21 | # DM22-0856
22 | #
23 | # ======================================================================================================================
24 |
25 | import logging
26 |
27 | import juneberry.evaluation.utils as jb_eval_utils
28 | import juneberry.filesystem as jb_fs
29 | from juneberry.onnx.evaluator import Evaluator
30 | from juneberry.onnx.utils import ONNXPlatformDefinitions
31 | import juneberry.pytorch.evaluation.utils as jb_pytorch_eval_utils
32 |
33 | logger = logging.getLogger(__name__)
34 |
35 |
36 | class OnnxEvaluationOutput:
37 | """
38 | This is the default ONNX evaluation class used for formatting raw classification evaluation data
39 | in Juneberry.
40 | """
41 |
42 | def __call__(self, evaluator: Evaluator):
43 | """
44 | When called, this method uses the attributes of the evaluator to format the raw evaluation data. The
45 | result of the process is the evaluator.output attribute will contain JSON-friendly data, which will
46 | then be written to a file.
47 | :param evaluator: The Evaluator object managing the evaluation.
48 | :return: Nothing.
49 | """
50 |
51 | # Perform the common eval output processing steps for a classifier.
52 | jb_eval_utils.prepare_classification_eval_output(evaluator)
53 |
54 | # Calculate the hash of the model that was used to conduct the evaluation.
55 | model_path = evaluator.model_manager.get_model_path(ONNXPlatformDefinitions())
56 | evaluated_model_hash = jb_fs.generate_file_hash(model_path)
57 |
58 | # If the model Juneberry trained the model, a hash would have been calculated after training.
59 | # Compare that hash (if it exists) to the hash of the model being evaluated.
60 | jb_eval_utils.verify_model_hash(evaluator, evaluated_model_hash, onnx=True)
61 |
62 | # If requested, get the top K classes predicted for each input.
63 | if evaluator.top_k:
64 | jb_pytorch_eval_utils.top_k_classifications(evaluator, evaluator.eval_dataset_config.label_names)
65 |
66 | # Save the predictions portion of the evaluation output to the appropriate file.
67 | logger.info(f"Saving predictions to {evaluator.eval_dir_mgr.get_predictions_path()}")
68 | evaluator.output_builder.save_predictions(evaluator.eval_dir_mgr.get_predictions_path())
69 |
70 | # Save the metrics portion of the evaluation output to the appropriate file.
71 | logger.info(f"Saving metrics to {evaluator.eval_dir_mgr.get_metrics_path()}")
72 | evaluator.output_builder.save_metrics(evaluator.eval_dir_mgr.get_metrics_path())
73 |
--------------------------------------------------------------------------------
/juneberry/schemas/experiment_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json-schema.org/draft-07/schema",
3 | "$id": "experiment_schema.json",
4 | "type": "object",
5 | "properties": {
6 | "description": { "type": "string" },
7 | "format_version": { "type": "string" },
8 | "filter": {
9 | "type": "array",
10 | "items": {
11 | "type": "object",
12 | "properties": {
13 | "tag": { "type": "string" },
14 | "cmd": {
15 | "type": "array",
16 | "items": { "type": "string" }
17 | },
18 | "inputs": {
19 | "type": "array",
20 | "items": { "type": "string" }
21 | }
22 | }
23 | }
24 | },
25 | "models": {
26 | "type": "array",
27 | "items": {
28 | "type": "object",
29 | "properties": {
30 | "filters": {
31 | "type": "array",
32 | "items": { "type": "string" }
33 | },
34 | "maximum_evaluations": { "type": "integer" },
35 | "name": { "type": "string" },
36 | "onnx": { "type": "boolean"},
37 | "tests": {
38 | "type": "array",
39 | "items": {
40 | "type": "object",
41 | "properties": {
42 | "classify": { "type": "integer" },
43 | "dataset_path": { "type": "string" },
44 | "filters": {
45 | "type": "array",
46 | "items": { "type": "string" }
47 | },
48 | "tag": { "type": "string" },
49 | "use_train_split": { "type": "boolean" },
50 | "use_val_split": { "type": "boolean" }
51 | },
52 | "required": [ "dataset_path", "tag" ]
53 | }
54 | },
55 | "train": { "type": "boolean" },
56 | "tuning": { "type": "string" },
57 | "version": { "type": "string" }
58 | },
59 | "required": [ "name", "tests" ]
60 | }
61 | },
62 | "reports": {
63 | "type": "array",
64 | "items": {
65 | "allOf": [{ "$ref": "report_schema.json#/$defs/report"}],
66 | "properties": {
67 | "classes": { "type": "string"},
68 | "tests": {
69 | "type": "array",
70 | "items": {
71 | "type": "object",
72 | "properties": {
73 | "tag": { "type": "string" },
74 | "classes": { "type": "string" }
75 | },
76 | "required": [ "tag" ]
77 | }
78 | }
79 | }
80 | }
81 | },
82 | "timestamp": { "type": "string" },
83 | "tuning": {
84 | "type": "array",
85 | "items": {
86 | "type": "object",
87 | "properties": {
88 | "model": { "type": "string" },
89 | "tuning_config": { "type": "string" }
90 | },
91 | "required": [ "model", "tuning_config" ]
92 | }
93 | }
94 | },
95 | "required": []
96 | }
97 |
--------------------------------------------------------------------------------