Hi there! This is the SendIt application for receiving, de-identifying, and sending DICOM images to storage. You must authenticate to get access to this application.
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | {% if form %}
26 |
33 | {% endif %}
34 |
35 |
36 |
37 | {% endblock %}
38 |
--------------------------------------------------------------------------------
/config.template:
--------------------------------------------------------------------------------
1 | #####################################################
2 | # RESTFUL API
3 | #####################################################
4 |
5 | @ANONYMIZE_RESTFUL=True
6 |
7 | # If True, scrub pixel data for images identified by header "Burned in Annotation" = "NO"
8 | # This is currently not supported
9 | @ANONYMIZE_PIXELS=False # currently not supported
10 |
11 | # The study to use
12 | @SOM_STUDY="test"
13 |
14 | # An additional specification for white, black, and greylisting data
15 | # If None, only the default (for burned pixel filtering) is used
16 | # Currently, these live with the deid software, eg:
17 | # https://github.com/pydicom/deid/blob/development/deid/data/deid.dicom.xray.chest
18 | # would be referenced with STUDY_DEID="dicom.xray.chest"
19 | @STUDY_DEID=""
20 |
21 | # Comma separated list of folders under sendit/1
22 | # EG, sendit/1/1_6 --> /data/1_6
23 | @DATA_INPUT_FOLDERS=""
24 |
25 | #####################################################
26 | # STORAGE
27 | #####################################################
28 |
29 | # Orthanc Storage
30 | @SEND_TO_ORTHANC=False
31 | @ORTHANC_IPADDRESS="127.0.0.1"
32 | @ORTHANC_PORT=4747
33 |
34 | # Google Storage
35 | # Should we send to Google at all?
36 | @SEND_TO_GOOGLE=True
37 |
38 | # Google Cloud Storage Bucket (must be created)
39 | @GOOGLE_CLOUD_STORAGE='radiology'
40 | @GOOGLE_STORAGE_COLLECTION=""
41 | @GOOGLE_PROJECT_NAME=""
42 |
--------------------------------------------------------------------------------
/sendit/apps/base/views.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2017 Vanessa Sochat
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | SOFTWARE.
21 |
22 | '''
23 |
24 | from django.shortcuts import render
25 |
26 | # Error Pages #########################################
27 |
28 | def handler404(request):
29 | return render(request,'base/404.html')
30 |
31 | def handler500(request):
32 | return render(request,'base/500.html')
33 |
--------------------------------------------------------------------------------
/sendit/apps/api/templates/routes/api.html:
--------------------------------------------------------------------------------
1 | {% extends "base/base.html" %}
2 | {% load staticfiles %}
3 | {% block head %}
4 | {% endblock %}
5 | {% block content %}
6 |
7 |
8 |
9 |
10 |
API
11 |
Application Programming Interface
12 |
13 | The REST API provides programmatic access to batch and image progress. This isn't currently used for anything, but might be useful at some point.
14 |
52 |
53 |
--------------------------------------------------------------------------------
/docs/setup.md:
--------------------------------------------------------------------------------
1 | # Setup
2 |
3 | This document will review basic setup of the sendit application. You will need root (sudo) permissions on a server, and ideally the ability to serve a web application (via a Docker image). The application can run without the web interface, but it's a nice way to interact and view what is going on.
4 |
5 |
6 | ## Download
7 | Before you start, you should make sure that you have Docker and docker-compose installed, and a complete script for setting up the dependencies for any instance [is provided](scripts/prepare_instance.sh). We basically install docker-compose, docker, and download this repository to an install base.
8 |
9 | You should walk through this carefully to make sure everything completes, and importantly, to install docker you will need to log in and out. The last steps in the preparation are to clone the repo, and we recommend a location like `/opt`.
10 |
11 | ```
12 | cd /opt
13 | git clone https://www.github.com/pydicom/sendit
14 | cd sendit
15 | ```
16 |
17 | This will mean your application base is located at `/opt/sendit` and we recommend that your data folder (where your system process will add files) be maintained at `/opt/sendit/data`. You don't have to do this, but if you don't, you need to change the folder in the [docker-compose.yml](docker-compose.yml) to where you want it to be. For example, right now we map `data` in the application's directory to `/data` in the container, and it looks like this:
18 |
19 | ```
20 | uwsgi:
21 | restart: always
22 | image: pydicom/sendit
23 | volumes:
24 | - ./data:/data
25 | ```
26 |
27 | to change that to `/tmp/dcm` you would change that line to:
28 |
29 | ```
30 | uwsgi:
31 | restart: always
32 | image: pydicom/sendit
33 | volumes:
34 | - /tmp/dcm:/data
35 | ```
36 |
37 | You should next [configure](config.md) your application before building the image.
38 |
--------------------------------------------------------------------------------
/sendit/apps/api/serializers.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2017 Vanessa Sochat
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | SOFTWARE.
21 |
22 | '''
23 |
24 | from django.contrib.auth.models import User
25 | from sendit.apps.main.models import (
26 | Batch,
27 | Image
28 | )
29 |
30 | from rest_framework import serializers
31 |
32 | class BatchSerializer(serializers.ModelSerializer):
33 | image_set = serializers.PrimaryKeyRelatedField(many=True,
34 | queryset=Image.objects.all())
35 |
36 | class Meta:
37 | model = Batch
38 | fields = ('uid','status','add_date','modify_date','id','image_set',)
39 |
40 |
41 | class ImageSerializer(serializers.ModelSerializer):
42 | class Meta:
43 | model = Image
44 | fields = ('uid','status','add_date','modify_date','id','batch',)
45 |
--------------------------------------------------------------------------------
/sendit/apps/api/utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2017 Vanessa Sochat
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | SOFTWARE.
21 |
22 | '''
23 |
24 | from sendit.apps.main.utils import ls_fullpath
25 | import os
26 |
27 | def get_size(batch):
28 | '''get the size of a batch, in gb
29 | '''
30 | do_calculation = False
31 | if batch.status == "DONE":
32 | if "SizeBytes" in batch.qa:
33 | if batch.qa['SizeBytes'] == 0:
34 | do_calculation=True
35 | else:
36 | do_calculation = True
37 | if do_calculation is True:
38 | batch_folder = "/data/%s" %(batch.uid)
39 | dicom_files = ls_fullpath(batch_folder)
40 | batch.qa['SizeBytes'] = sum(os.path.getsize(f) for f in dicom_files)
41 | batch.save()
42 | return batch.qa['SizeBytes']/(1024*1024*1024.0) # bytes to GB
43 |
--------------------------------------------------------------------------------
/sendit/apps/main/management/commands/show_times.py:
--------------------------------------------------------------------------------
1 | from sendit.logger import bot
2 | from sendit.apps.main.models import Batch
3 | from django.core.management.base import (
4 | BaseCommand
5 | )
6 |
7 | from sendit.apps.main.models import Batch
8 | from sendit.apps.main.tasks import import_dicomdir
9 | from sendit.apps.main.utils import ls_fullpath
10 |
11 | import sys
12 | import os
13 |
14 |
15 | def get_size(batch):
16 | do_calculation = False
17 | if batch.status == "DONE":
18 | if "SizeBytes" in batch.qa:
19 | if batch.qa['SizeBytes'] == 0:
20 | do_calculation=True
21 | else:
22 | do_calculation = True
23 | if do_calculation is True:
24 | batch_folder = "/data/%s" %(batch.uid)
25 | dicom_files = ls_fullpath(batch_folder)
26 | batch.qa['SizeBytes'] = sum(os.path.getsize(f) for f in dicom_files)
27 | batch.save()
28 | return batch.qa['SizeBytes']/(1024*1024.0) # bytes to MB
29 |
30 |
31 |
32 | class Command(BaseCommand):
33 | help = '''get a quick overview of stats for running times'''
34 |
35 | def handle(self,*args, **options):
36 |
37 | new_batches = 0
38 | for batch in Batch.objects.all():
39 | if batch.status == "ERROR":
40 | continue
41 | elif batch.status == "EMPTY":
42 | continue
43 | elif batch.status == "DONE":
44 | size = get_size(batch) # mb
45 | time = batch.qa['FinishTime'] - batch.qa['StartTime']
46 | bot.info("Batch %s: %s MB in %s minutes" %(batch.uid,
47 | size,
48 | time/60))
49 | else:
50 | new_batches+=1
51 |
52 | bot.info("%s new batches still processing." %(new_batches))
53 |
--------------------------------------------------------------------------------
/docs/manager.md:
--------------------------------------------------------------------------------
1 | # Django's Management
2 | Django is primarily controlled via `manage.py`, the file sitting in the base of the repo. You will see it's use in several scripts such as [run_uwsgi.sh](../run_uwsgi.sh) to do things like `makemigrations` and `migrate`. These commands in particular are used to update the database (given any changes in a `models.py` files that define the tables. Generally, you can run commands to control user generation, database updates and dumps, and even your own custom. The commands that I use most often are `shell` and (sometimes) `dbshell` to immediately get an interactive shell for the python application (shell) or the postgres database (dbshell). With `--help` we can see everything that `manage.py` can do:
3 |
4 | ```bash
5 | [auth]
6 | changepassword
7 | createsuperuser
8 |
9 | [contenttypes]
10 | remove_stale_contenttypes
11 |
12 | [django]
13 | check
14 | compilemessages
15 | createcachetable
16 | dbshell
17 | diffsettings
18 | dumpdata
19 | flush
20 | inspectdb
21 | loaddata
22 | makemessages
23 | makemigrations
24 | migrate
25 | opbeat
26 | sendtestemail
27 | shell
28 | showmigrations
29 | sqlflush
30 | sqlmigrate
31 | sqlsequencereset
32 | squashmigrations
33 | startapp
34 | startproject
35 | test
36 | testserver
37 |
38 | [djcelery]
39 | celery
40 | celerybeat
41 | celerycam
42 | celeryd
43 | celeryd_detach
44 | celeryd_multi
45 | celerymon
46 | djcelerymon
47 |
48 | [guardian]
49 | clean_orphan_obj_perms
50 |
51 | [sessions]
52 | clearsessions
53 |
54 | [sitemaps]
55 | ping_google
56 |
57 | [staticfiles]
58 | collectstatic
59 | findstatic
60 | runserver
61 |
62 | [watcher]
63 | start_watcher
64 | stop_watcher
65 | ```
66 |
67 | For example, the last set of commands for the `watcher` we defined by adding a `management/commands` to our watcher application.
68 |
--------------------------------------------------------------------------------
/scripts/prepare_instance.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Change this to where you want to install.
4 | INSTALL_ROOT=/opt
5 |
6 | # Prepare instance (or machine) with Docker, docker-compose, python
7 |
8 | sudo apt-get update > /dev/null
9 | sudo apt-get install -y git \
10 | build-essential \
11 | nginx \
12 | python-dev
13 |
14 | # Needed module for system python
15 | wget https://bootstrap.pypa.io/get-pip.py
16 | sudo /usr/bin/python get-pip.py
17 | sudo pip install ipaddress
18 | sudo pip install oauth2client
19 |
20 | # Python 3
21 | wget https://repo.continuum.io/archive/Anaconda3-4.2.0-Linux-x86_64.sh
22 | bash Anaconda3-4.2.0-Linux-x86_64.sh -b
23 |
24 | # You might already have anaconda installed somewhere
25 | PATH=$HOME/anaconda3/bin:$PATH
26 | rm Anaconda3-4.2.0-Linux-x86_64.sh
27 | export PATH
28 |
29 | # Add docker key server
30 | sudo apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D
31 |
32 | # Install Docker!
33 | sudo apt-get update &&
34 | sudo apt-get install apt-transport-https ca-certificates &&
35 | sudo apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D
36 | echo "deb https://apt.dockerproject.org/repo ubuntu-xenial main" | sudo tee --append /etc/apt/sources.list.d/docker.list
37 | sudo apt-get update &&
38 | apt-cache policy docker-engine
39 | sudo apt-get update &&
40 | sudo apt-get -y install linux-image-extra-$(uname -r) linux-image-extra-virtual &&
41 | sudo apt-get -y install docker-engine &&
42 | sudo service docker start
43 |
44 | #sudo docker run hello-world
45 | #make sure to add all users that will maintain / use the registry
46 | sudo usermod -aG docker $USER
47 |
48 | # Docker-compose
49 | sudo apt -y install docker-compose
50 |
51 | # Note that you will need to log in and out for changes to take effect
52 |
53 | if [ ! -d $INSTALL_ROOT/sendit ]
54 | then
55 | cd $INSTALL_ROOT
56 | git clone https://www.github.com/pydicom/sendit.git
57 | fi
58 |
--------------------------------------------------------------------------------
/sendit/apps/api/urls.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2017 Vanessa Sochat
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | SOFTWARE.
21 |
22 | '''
23 |
24 | from django.views.generic.base import TemplateView
25 | from django.conf.urls import url, include
26 |
27 | from rest_framework import routers
28 | from rest_framework.authtoken import views as rest_views
29 | from rest_framework_swagger.views import get_swagger_view
30 |
31 | import sendit.apps.api.views as api_views
32 | from sendit.settings import API_VERSION
33 |
34 | swagger_view = get_swagger_view(title='sendit API', url='')
35 | router = routers.DefaultRouter()
36 | router.register(r'^images', api_views.ImageViewSet)
37 | router.register(r'^batches', api_views.BatchViewSet)
38 |
39 |
40 | urlpatterns = [
41 |
42 | url(r'^$', swagger_view),
43 | url(r'^metrics$', api_views.metrics_view, name='metrics_view'),
44 | url(r'^metrics/gb/(?P\d+)/$', api_views.gb_day, name="gb_day"),
45 | url(r'^metrics/gb$', api_views.gb_day, name="gb_day"),
46 | url(r'^docs$', api_views.api_view, name="api"),
47 | ]
48 |
--------------------------------------------------------------------------------
/sendit/settings/config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | #####################################################
4 | # RESTFUL API
5 | #####################################################
6 |
7 | # Anonynize
8 | # If True, we will have the images first go to a task to retrieve fields to anonymize
9 | ANONYMIZE_RESTFUL=True
10 |
11 | # These credentials are required for the DASHER endpoint
12 | STANFORD_APPLICATION_CREDENTIALS='/var/www/images/.stanford'
13 | os.environ['STANFORD_CLIENT_SECRETS'] = STANFORD_APPLICATION_CREDENTIALS
14 |
15 | # If True, scrub pixel data for images identified by header "Burned in Annotation" = "NO"
16 | ANONYMIZE_PIXELS=False # currently not supported
17 |
18 | # An additional specification for white, black, and greylisting data
19 | # If None, only the default (for burned pixel filtering) is used
20 | # Currently, these live with the deid software, eg:
21 | # https://github.com/pydicom/deid/blob/development/deid/data/deid.dicom.xray.chest
22 | # would be referenced with STUDY_DEID="dicom.xray.chest"
23 | STUDY_DEID=None
24 |
25 | # PatientID and SOPInstanceUID:
26 | # These are default for deid, but we can change that here
27 | ENTITY_ID="PatientID"
28 | ITEM_ID="AccessionNumber"
29 |
30 | #####################################################
31 | # WORKER
32 | #####################################################
33 |
34 | # Optionally, parse a subfolder under /data, or set to None
35 | DATA_BASE = "/data"
36 | DATA_SUBFOLDER=None # ignored if DATA_INPUT_FOLDERS is set
37 | DATA_INPUT_FOLDERS=None
38 |
39 | #####################################################
40 | # STORAGE
41 | #####################################################
42 |
43 | # Google Storage
44 | # Should we send to Google at all?
45 | SEND_TO_GOOGLE=True
46 |
47 | # These credentials are required for Google
48 | GOOGLE_APPLICATION_CREDENTIALS='/code/.google'
49 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS
50 |
51 | # Google Cloud Storage Bucket (must be created)
52 | GOOGLE_CLOUD_STORAGE='radiology'
53 | GOOGLE_STORAGE_COLLECTION='' # must be defined before SOM_STUDY
54 | GOOGLE_PROJECT_NAME=None
55 |
--------------------------------------------------------------------------------
/sendit/apps/main/views/images.py:
--------------------------------------------------------------------------------
1 | '''
2 |
3 | Copyright (c) 2017 Vanessa Sochat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | '''
24 |
25 | from sendit.apps.main.models import (
26 | Batch,
27 | Image
28 | )
29 |
30 | from sendit.apps.main.utils import get_image
31 | from django.contrib.auth.decorators import login_required
32 | from django.contrib.auth.models import User
33 | from django.contrib import messages
34 |
35 | from django.http import (
36 | HttpResponse,
37 | JsonResponse
38 | )
39 |
40 | from django.http.response import (
41 | HttpResponseRedirect,
42 | HttpResponseForbidden,
43 | Http404
44 | )
45 |
46 | from django.shortcuts import (
47 | get_object_or_404,
48 | render_to_response,
49 | render,
50 | redirect
51 | )
52 |
53 | import os
54 |
55 |
56 | def image_details(request,iid):
57 | '''view details for an image
58 | '''
59 | image = get_image(iid)
60 | context = {"active":"dashboard",
61 | "image" : image,
62 | "title": image.uid }
63 |
64 | return render(request, 'images/image_details.html', context)
65 |
66 |
--------------------------------------------------------------------------------
/https/nginx.conf:
--------------------------------------------------------------------------------
1 | server {
2 | listen *:80;
3 | server_name localhost;
4 |
5 | client_max_body_size 1024M; # allows file uploads up to XXX megabytes
6 | client_body_buffer_size 1024M; # this is for in memory uploading of images
7 |
8 | add_header X-Clacks-Overhead "GNU Terry Pratchett";
9 | add_header X-Clacks-Overhead "GNU Terry Pratchet";
10 | add_header Access-Control-Allow-Origin *;
11 | add_header 'Access-Control-Allow-Credentials' 'true';
12 | add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS';
13 | add_header 'Access-Control-Allow-Headers' 'Authorization,DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type';
14 |
15 | location /.well-known/acme-challenge/ {
16 | alias /var/www/.well-known/acme-challenge/;
17 | }
18 |
19 | location /images {
20 | alias /var/www/images;
21 | }
22 |
23 | location / {
24 | include /etc/nginx/uwsgi_params.par;
25 | uwsgi_pass uwsgi:3031;
26 | }
27 |
28 | location /static {
29 | alias /var/www/static;
30 | }
31 |
32 | }
33 |
34 | server {
35 |
36 | listen 443;
37 | server_name localhost;
38 |
39 | root html;
40 | client_max_body_size 1024M;
41 |
42 | ssl on;
43 | ssl_certificate /etc/ssl/certs/chained.pem;
44 | ssl_certificate_key /etc/ssl/private/domain.key;
45 | ssl_session_timeout 5m;
46 | ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
47 | ssl_ciphers ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA:ECDHE-RSA-AES128-SHA:DHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA;
48 | ssl_session_cache shared:SSL:50m;
49 | ssl_dhparam /etc/ssl/certs/dhparam.pem;
50 | ssl_prefer_server_ciphers on;
51 |
52 | location /images {
53 | alias /var/www/images;
54 | }
55 |
56 | location / {
57 | include /etc/nginx/uwsgi_params.par;
58 | uwsgi_pass uwsgi:3031;
59 | }
60 |
61 | location /static {
62 | alias /var/www/static;
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/sendit/apps/main/templates/images/image_details.html:
--------------------------------------------------------------------------------
1 | {% extends "base/base.html" %}
2 | {% load crispy_forms_tags %}
3 | {% load dictionary_extras %}
4 | {% load staticfiles %}
5 | {% load humanize %}
6 | {% block head %}
7 |
8 |
9 |
10 |
11 |
12 |
17 |
18 |
42 | {% endblock %}
43 |
44 | {% block content %}
45 | {% include 'messages/messages.html' %}
46 |
47 |
48 |
49 |
50 |
51 |
52 |
{{ image.uid }}
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
66 |
67 |
68 |
69 |
70 |
71 | {% endblock %}
72 |
--------------------------------------------------------------------------------
/sendit/apps/main/views/batch.py:
--------------------------------------------------------------------------------
1 | '''
2 |
3 | Copyright (c) 2017 Vanessa Sochat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | '''
24 |
25 | from sendit.apps.main.models import (
26 | Batch,
27 | Image
28 | )
29 |
30 | from sendit.apps.main.utils import get_batch
31 | from django.contrib.auth.decorators import login_required
32 | from django.contrib.auth.models import User
33 | from django.contrib import messages
34 |
35 | from django.http import (
36 | HttpResponse,
37 | JsonResponse
38 | )
39 |
40 | from django.http.response import (
41 | HttpResponseRedirect,
42 | HttpResponseForbidden,
43 | Http404
44 | )
45 |
46 | from django.shortcuts import (
47 | get_object_or_404,
48 | render_to_response,
49 | render,
50 | redirect
51 | )
52 |
53 | import os
54 |
55 | def get_batch_context(bid):
56 | '''a repeated sequence of calls to get the context
57 | for a batch based on id'''
58 | batch = get_batch(bid)
59 | context = {"active":"dashboard",
60 | "batch" : batch,
61 | "title": batch.uid }
62 | return context
63 |
64 | def batch_details(request,bid):
65 | '''view details for a batch
66 | '''
67 | context = get_batch_context(bid)
68 | return render(request, 'batch/batch_details.html', context)
69 |
70 |
--------------------------------------------------------------------------------
/sendit/urls.py:
--------------------------------------------------------------------------------
1 | '''
2 | sendit url configuration
3 | Copyright (c) 2017 Vanessa Sochat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | The `urlpatterns` list routes URLs to views. For more information please see:
23 | https://docs.djangoproject.com/en/1.9/topics/http/urls/
24 |
25 | '''
26 |
27 |
28 | from django.conf.urls import include, url
29 | from sendit.apps.base import urls as base_urls
30 | from sendit.apps.main import urls as main_urls
31 | from sendit.apps.api import urls as api_urls
32 | from django.contrib import admin
33 |
34 | # Configure custom error pages
35 | from django.conf.urls import ( handler404, handler500 )
36 | handler404 = 'sendit.apps.base.views.handler404'
37 | handler500 = 'sendit.apps.base.views.handler500'
38 |
39 | # Sitemaps
40 | #from sendit.apps.api.sitemap import ReportCollectionSitemap, ReportSitemap
41 | #sitemaps = {"reports":ReportSitemap,
42 | # "collections":ReportCollectionSitemap}
43 |
44 | urlpatterns = [
45 | url(r'^admin/', admin.site.urls),
46 | url(r'^', include(base_urls)),
47 | url(r'^api/', include(api_urls)),
48 | url(r'^', include(main_urls)),
49 | # url(r'^sitemap\.xml$', index, {'sitemaps': sitemaps}, name="sitemap"),
50 | # url(r'^sitemap-(?P.+)\.xml$', sitemap, {'sitemaps': sitemaps},
51 | # name='django.contrib.sitemaps.views.sitemap'),
52 | ]
53 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # SendIt Documentation
2 |
3 | ## Overview
4 | The Sendit application is an on demand application that works in two stages to optimally anonymize and push anonymized images and metadata to Google Cloud Storage, and Google Cloud BigQuery, respectively. It works as follows:
5 |
6 | - the researcher starts the anonymization pipeline with an input of one or mode folders
7 | - each folder is added as a "Batch" with status "QUEUE" to indicate they are ready for import
8 | - anonymization is performed (status "PROCESSING"), meaning removing/replacing fields in the header and image data, .
9 | - when status "DONEPROCESSING" is achieved for all in the queue, the researcher triggers the final job to send data to storage (status "SENT")
10 |
11 | ## Preparation
12 | The base of the image is distributed via [sendit-base](https://github.com/pydicom/sendit/tree/master/scripts/docker). This image has all dependencies for the base so we can easily bring the image up and down.
13 |
14 | - [Application](application.md): If you are a new developer, please read about the application flow and infrastructure first. Sendit is a skeleton that uses other python modules to handle interaction with Stanford and Google APIs, along with anonymization of datasets.
15 |
16 | ## Deployment
17 |
18 | - [Setup](setup.md): Basic setup (download and install) of a new application for a server.
19 | - [Configuration](config.md): How to configure the application before starting it up.
20 | - [Start](start.md): Start it up!
21 | - [Interface](interface.md): A simple web interface for monitoring batches.
22 |
23 | ## Module-specific Documentation
24 |
25 | - [Management](manager.md): an overview of controlling the application with [manage.py](../manage.py)
26 | - [Logging](logging.md): overview of the logger provided in the application
27 | - [Watcher](watcher.md): configuration and use of the watcher daemon to detect new DICOM datasets
28 |
29 |
30 | ## Steps in Pipeline
31 | 1. [Dicom Import](dicom_import.md): The logic for when a session directory is detected as finished by the Watcher.
32 | 2. [Anonymize](anonymize.md): the defaults (and configuration) for the anonymization step of the pipeline. This currently includes just header fields, and we expect to add pixel anonymization.
33 | 3. [Storage](storage.md): Is the final step to move the anonymized dicom files to OrthanCP and/or Google Cloud Storage.
34 | 4. [Error Handling](errors.md): an overview of how the application managers server, API, and other potential issues.
35 |
--------------------------------------------------------------------------------
/docs/dicom_import.md:
--------------------------------------------------------------------------------
1 | # Pre Dicom Import
2 | There is a process running on the server that uses based `dcm4che` command line tools to issue a `C-MOVE` command to download datasets to the application `/data` folder. The script that runs might look something like this:
3 |
4 | ```bash
5 | #!/bin/bash
6 |
7 | CALLINGAE=calling-ae-title
8 | PORT=111.11.111.11
9 | TARGETAE=ONION@22.222.22.22:4444
10 | NUM=L123456
11 | BASE=/opt/sendit/data
12 |
13 | mkdir $BASE/$NUM.tmp
14 | dcmqr -L$CALLINGAE@$PORT $TARGETAE -qAccessionNumber=$NUM -cmove $CALLINGAE -cstoredest=$BASE/$NUM.tmp
15 | mv $BASE/$NUM.tmp $BASE/$NUM
16 | ```
17 |
18 | In the above, we see that `dcmqr` is used to call `C-MOVE` to dump a bunch of dicoms into a folder named based on a number, which is likely an accession number as it is a common query. The last line of the script renames the `*.tmp` folder by removing the extension, which then notifies the watcher that the folder is done.
19 |
20 | # Dicom Import
21 | When the [watcher](watcher.md) detects a `FINISHED` session directory in the folder being watched (`/data` in the container, mapping to `data` in the application base folder on the host), the process of importing the images into the database is started. This means the following steps:
22 |
23 | ## 1. Adding Models to Database
24 | Each dicom file is read, and during reading, added as an `Image` object to the database. The study and session are also extracted from the header, and these are added as `Study` and `Session` objects, respectively. The Series and Study Ids are extracted from these fields in the dicom header, for each file separately:
25 |
26 | ```
27 | StudyID
28 | SeriesInstanceUID
29 | ```
30 |
31 | If we run into some case where the fields are not defined, I have put a check that will use the folder name instead, prefixed with `series_` or `study_`. For example, a folder `ST-1234` with a dicom missing header information would have study and session `series_ST-1234` and study `study_ST-1234`.
32 |
33 |
34 | ## 2. Saving Dicoms
35 | All files in the folder are assumed to be dicom, as it is the case the extensions may vary. If a file is attempted to be read as dicom fails, a warning is issued and the file skipped, but the process continued. The file is not removed, in case inspection is warranted later (is this how we want it?) (some notification?)
36 |
37 | The dicom file itself, when saved to the model, is saved with the application's media at `/images`.
38 |
39 | ## 3. Finishing Batch
40 | All the images found in a folder are considered to be a "batch," and when all files for a batch have been added, the function fires off the list to be anonymized. If there were no files in the batch, the function is not fired.
41 |
--------------------------------------------------------------------------------
/sendit/apps/main/templates/main/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base/base.html" %}
2 | {% block content %}
3 |
52 |
53 | {% endblock %}
54 |
--------------------------------------------------------------------------------
/scripts/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.4
2 | ENV PYTHONUNBUFFERED 1
3 | RUN apt-get update && apt-get install -y cmake \
4 | libpng12-dev libtiff5-dev libxml2-dev libjpeg62-turbo-dev \
5 | zlib1g-dev libwrap0-dev libssl-dev \
6 | libopenblas-dev \
7 | gfortran \
8 | python3-numpy \
9 | pkg-config \
10 | libxml2-dev \
11 | libxmlsec1-dev \
12 | libhdf5-dev \
13 | libgeos-dev \
14 | build-essential \
15 | openssl \
16 | nginx \
17 | wget \
18 | vim
19 |
20 | RUN apt-get upgrade python-setuptools
21 | RUN pip install --upgrade setuptools
22 | RUN pip install --upgrade pip
23 | RUN pip install uwsgi
24 | RUN pip install Django==1.11.2
25 | RUN pip install social-auth-app-django
26 | RUN pip install social-auth-core[saml]
27 | RUN pip install djangorestframework
28 | RUN pip install django-rest-swagger
29 | RUN pip install django-filter
30 | RUN pip install django-taggit
31 | RUN pip install django-form-utils
32 | RUN pip install django-crispy-forms
33 | RUN pip install django-taggit-templatetags
34 | RUN pip install django-dirtyfields
35 | RUN pip install psycopg2
36 | RUN pip install shapely
37 | RUN pip install Pillow
38 | RUN pip install requests
39 | RUN pip install requests-oauthlib
40 | RUN pip install python-openid
41 | RUN pip install django-sendfile
42 | RUN pip install django-polymorphic
43 | RUN pip install celery[redis]==3.1.25
44 | RUN pip install django-celery
45 | RUN pip install django-cleanup
46 | RUN pip install django-chosen
47 | RUN pip install opbeat
48 | RUN pip install 'django-hstore==1.3.5'
49 | RUN pip install django-datatables-view
50 | RUN pip install django-oauth-toolkit
51 | RUN pip install simplejson
52 | RUN pip install django-gravatar2
53 | RUN pip install pygments
54 | RUN pip install django-lockdown
55 | RUN pip install xmltodict
56 | #RUN pip install grpcio
57 | RUN pip install django-user-agents
58 | RUN pip install django-guardian
59 | RUN pip install pyinotify
60 | RUN pip install matplotlib
61 |
62 | # Install pydicom
63 | WORKDIR /tmp
64 | RUN git clone https://github.com/pydicom/pydicom
65 | WORKDIR pydicom
66 | RUN git checkout affb1cf10c6be2aca311c29ddddc622f8bd1f810
67 | RUN python setup.py install
68 |
69 | # deid
70 | WORKDIR /tmp
71 | RUN git clone -b development https://github.com/pydicom/deid
72 | WORKDIR /tmp/deid
73 | RUN python setup.py install
74 |
75 | # som
76 | WORKDIR /tmp
77 | RUN git clone https://github.com/vsoch/som
78 | WORKDIR /tmp/som
79 | RUN python setup.py install
80 |
81 | RUN pip install google-cloud-storage
82 | RUN pip install google-cloud-datastore
83 | RUN pip install google-cloud-bigquery
84 | RUN pip install google-api-python-client
85 | RUN pip install google-cloud
86 |
87 | RUN mkdir /code
88 | RUN mkdir -p /var/www/images
89 | RUN mkdir /data
90 | WORKDIR /code
91 | RUN apt-get remove -y gfortran
92 |
93 | # Crontab
94 | RUN apt-get update && apt-get install -y gnome-schedule
95 |
96 | RUN apt-get autoremove -y
97 | RUN apt-get clean
98 |
99 | ENV MESSAGELEVEL -1
100 |
101 | WORKDIR /code
102 |
103 | EXPOSE 3031
104 |
--------------------------------------------------------------------------------
/sendit/apps/base/templates/base/head.html:
--------------------------------------------------------------------------------
1 | {% load static %}
2 |
3 | {% block title %}SendIt: Stanford SOM{% endblock %}
4 | {% block includes %}{% endblock %}
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | {% if active == "dashboard"%}
38 |
39 | {% endif %}
40 |
41 | {% if active == "settings"%}
42 |
43 | {% endif %}
44 |
45 | {% block css %}{% endblock %}
46 | {% block head %}{% endblock %}
47 | {% block functions %}{% endblock %}
48 |
49 |
--------------------------------------------------------------------------------
/sendit/apps/main/views/main.py:
--------------------------------------------------------------------------------
1 | '''
2 |
3 | Copyright (c) 2017 Vanessa Sochat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | '''
24 |
25 | from sendit.apps.main.models import (
26 | Batch,
27 | Image
28 | )
29 |
30 | from sendit.apps.main.utils import get_batch
31 | from django.contrib.auth.decorators import login_required
32 | from django.contrib.auth.models import User
33 | from django.contrib import messages
34 |
35 | from django.http import (
36 | HttpResponse,
37 | JsonResponse
38 | )
39 |
40 | from django.http.response import (
41 | HttpResponseRedirect,
42 | HttpResponseForbidden,
43 | Http404
44 | )
45 |
46 | from django.shortcuts import (
47 | get_object_or_404,
48 | render_to_response,
49 | render,
50 | redirect
51 | )
52 |
53 | from sendit.apps.watcher.utils import (
54 | is_watching
55 | )
56 |
57 | from sendit.settings import (
58 | BASE_DIR,
59 | MEDIA_ROOT
60 | )
61 | import os
62 | media_dir = os.path.join(BASE_DIR,MEDIA_ROOT)
63 |
64 |
65 | def index_view(request):
66 | '''index view is associated with the dashboard (home) view of the
67 | application. It shows the user a table of current batches, and an
68 | overall summary.
69 | '''
70 | batches = Batch.objects.all()
71 | context = {"active":"dashboard",
72 | "batches":batches,
73 | "title":"Dashboard"}
74 | context['is_watching'] = is_watching()
75 |
76 | return render(request, 'main/index.html', context)
77 |
78 |
79 |
80 | def settings_view(request):
81 | '''settings view is the portal to control turning the watcher on
82 | and off, as well as seeing general logs.
83 | '''
84 | context = {"active":"settings",
85 | "title": "Settings"}
86 |
87 | # Is the watcher running?
88 | context['is_watching'] = is_watching()
89 |
90 | return render(request, 'main/settings.html', context)
91 |
92 |
93 | def batch_details(request,bid):
94 | '''view details for a batch
95 | '''
96 | batch = get_batch(bid)
97 | context = {"active":"dashboard",
98 | "batch" : batch,
99 | "title": batch.uid }
100 |
101 | # Is the watcher running?
102 | context['is_watching'] = is_watching()
103 |
104 | return render(request, 'batch/batch_details.html', context)
105 |
106 |
--------------------------------------------------------------------------------
/docs/interface.md:
--------------------------------------------------------------------------------
1 | # Interface
2 | This application is not intended to be a web application, open to the world for perusing. However, it does have a simple interface to monitor batches, and check the status of the watcher. This interface can be modified as needed, depending on the needs of the user (the developer or administrator of the server). Thus, we suggest taking the following steps:
3 |
4 | - **Limit ip address**: Limit the ip addresses that can access the application to those in the range of your group.
5 | - **Passwords**: Make sure to set a lockdown password, so if someone accesses the web interface from within your group (that should not be) it is still protected.
6 |
7 | There isn't any sort of button provided in the interface to do anything, so likely these precautions are just to (generally) keep things private.
8 |
9 | # Dashboard
10 | For the dashboard and settings views, you will notice the interface "jump" every 10 seconds. This is because it automatically refreshes itself, to show updates if there are any.
11 |
12 |
13 | ## Watcher Status
14 | The watcher status is determined by the presence of absence of the pid file, and will show you the status in the interface if active:
15 |
16 | 
17 |
18 | or inactive:
19 |
20 | 
21 |
22 |
23 | ## Batches
24 | The dashboard also shows basic info about the batches. In the above pictures, there are none. In the picture below, there is one new batch.
25 |
26 | 
27 |
28 | Given that a batch has errors, there will be a link to inspect the specific error message (the error tracking is implemented, but not yet the view).
29 |
30 | Next, learn about usage by reading about the [manager](manager.md).
31 |
32 |
33 | ## Batch Details
34 | You can click on a batch name to see details for the batch. Importantly, if the batch has any errors during processing, a separate table (not shown in the image below) will appear for you to inspect.
35 |
36 | 
37 |
38 |
39 | ## Image Details
40 | For quick visual inspection of an image, you can click on "View" under the batch view to access the image details view:
41 |
42 |
43 | 
44 |
45 |
46 | # API
47 | It might be desired at some point to have the application be able to talk to others, so it has a simple API to serve batches and images, each as resources.
48 |
49 | 
50 |
51 |
52 | ## Resources
53 | Per typical rest api, the following returns a paginated result of all resources, with links to `next` and `previous`, along with a `count`, and a list of `results` objects, each corresponding with one model. The api endpoints might look like this:
54 |
55 | ```
56 | /api/images
57 | /api/batches
58 | ```
59 |
60 | and visually, it looks like this:
61 |
62 | 
63 |
64 | This means that, to query a specific page you would do:
65 |
66 | ```
67 | /api/images?page=2
68 | ```
69 |
70 | and again, this entire interface is behind the lockdown password, and can have additional authentication or changes if needed.
71 |
72 |
73 | ## Swagger
74 | If the user (the administrators of the endpoint) want to interact or query dynamically, a swagger endpoint is provided:
75 |
76 | 
77 |
78 | And it generally explains the resources.
79 |
80 |
81 | 
82 |
--------------------------------------------------------------------------------
/scripts/save_google_sheets.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python
2 |
3 | # Command line script to get GB/day from manager, then save to google sheet.
4 | from som.api.google.sheets import Client
5 | from datetime import datetime, timedelta
6 | import subprocess
7 | import argparse
8 | import json
9 | import os
10 | import sys
11 |
12 |
13 | def get_parser():
14 | parser = argparse.ArgumentParser(
15 | description="Sendit: save GB-day to Google Sheets")
16 |
17 | parser.add_argument("--sheet_id", dest='sheet_id',
18 | help="alpha-numerical string that is id for sheet",
19 | type=str, required=True)
20 |
21 | parser.add_argument("--days", dest='days',
22 | help="number of days to ask for metric (default is 1)",
23 | type=int, default=1)
24 |
25 | # Compare two images (a similarity tree)
26 | parser.add_argument('--save', dest='save',
27 | help="required flag to save new row (otherwise prints sheet)",
28 | default=False, action='store_true')
29 |
30 | return parser
31 |
32 |
33 | def main():
34 |
35 | parser = get_parser()
36 |
37 | try:
38 | args = parser.parse_args()
39 | except:
40 | sys.exit(0)
41 |
42 | command = ["python", "manage.py", "summary_metrics", "--days", str(args.days)]
43 | process = subprocess.Popen(command, stdout=subprocess.PIPE)
44 | result,error = process.communicate()
45 |
46 | if isinstance(result,bytes):
47 | result = result.decode('utf-8')
48 |
49 | result = json.loads(result)
50 |
51 | gb_day = result["gb_per_day"]
52 |
53 | secrets = os.environ.get('GOOGLE_SHEETS_CREDENTIALS')
54 | if secrets is None:
55 | print("Please export client secrets file name at GOOGLE_SHEETS_CREDENTIALS")
56 | sys.exit(1)
57 |
58 | cli = Client()
59 |
60 | # Define date range for metric
61 | start_date = (datetime.now() - timedelta(days=args.days)).strftime("%m/%d/%Y")
62 | end_date = datetime.now().strftime("%m/%d/%Y")
63 |
64 | # Get previous values
65 | values = cli.read_spreadsheet(sheet_id=args.sheet_id, range_name="A:E")
66 |
67 | # Only update if we are sure about values
68 | required = ['pipeline',
69 | 'start_date',
70 | 'end_date',
71 | 'G/day GetIt',
72 | 'G/day SendIt']
73 |
74 | for h in range(len(required)):
75 | if required[h] != values[0][h]:
76 | print("Warning, sheet is possibly changed.")
77 | print("Required: %s" %",".join(required))
78 | print("Found: %s" %",".join(values[0]))
79 | sys.exit(0)
80 |
81 | # Create row, append
82 | # pipeline start_date end_date G/day GetIt G/day SendIt
83 | # Define new row, add
84 |
85 | row = [1, # pipeline
86 | start_date, # start_date
87 | end_date, # end_date
88 | None, # G/day GetIt
89 | gb_day] # G/day SendIt
90 |
91 | values.append(row)
92 |
93 | for row in values:
94 | print(' '.join([str(x) for x in row]))
95 |
96 | # Update sheet
97 | if args.save is True:
98 | print("Saving result to sheet %s" %args.sheet_id)
99 | result = cli.write_spreadsheet(args.sheet_id, values, range_name="A:E")
100 |
101 |
102 | if __name__ == '__main__':
103 | main()
104 |
--------------------------------------------------------------------------------
/docs/storage.md:
--------------------------------------------------------------------------------
1 | # Storage
2 | When we get here, we have anonynized our data, and the user can optionally choose to send it off to cloud storage. As a reminder, this is determined in the settings, under [settings/config.py](../sendit/settings/config.py):
3 |
4 | ```
5 | # Should we send to Google at all?
6 | SEND_TO_GOOGLE=False
7 |
8 | # Google Cloud Storage
9 | GOOGLE_CLOUD_STORAGE='som-pacs'
10 | ```
11 |
12 | Importantly, for the above, there must be `GOOGLE_APPLICATION_CREDENTIALS` filepath, a `GOOGLE_PROJECT_NAME`, and a `GOOGLE_STORAGE_COLLECTION` variables exported in the environment, or it should be run on a Google Cloud Instance (unlikely).
13 |
14 | ## Upload Process
15 | By the time we get here, we've anonymized the images, and prepared an equivalent corresponding lookup (with old image identifier) that also has the same anonymized metadata. The difference is that the lookup has additional information from nested sequences that are easy to extract. We now proceed to use the som tools google storage API client to upload a set of images associated with an entity and metadata to Google Storage (images) and Google BigQuery (metadata).
16 |
17 | Speifically, we retrieve images for the batch that weren't flagged for having possible PHI in the pixels, meaning they are in the entity's PHI folder. We instantiate a client based on the storage bucket and project name (the `GOOGLE_APPLICATION_CREDENTIALS` are essential for this to work. If you get permissions errors, you have an issue either with finding this file, or the file (the IAM permissions) in Google Cloud not having Read/Write/Admin access to the resource) and the client sends images to Google Storage, and metadata to BigQuery.
18 |
19 | ## Storage
20 | The images are first uploaded to Storage, and complete metadata about their location , etc, returned. In object storage, a "path" is really a key for the object. We use an organizational schema that groups things on the level of Collection (IRB), Entity (Patient) and Image Set (Study). For example, for google bucket "radiology" we might see:
21 |
22 | ```
23 | Buckets/radiology/Collection/IRB41449/Entity/GL664ba0/GL664ba0_20070904_GL71cfb7.tar.gz
24 | Buckets//Collection//Entity//
25 | ```
26 |
27 | In the above, the final compressed object is all images for a single study, and this may change to include another level of study identifier, and single dicoms.
28 |
29 | When the upload to Storage is done, we receive back metadata about its location. This additional metadata, along with the item metadata in `items` is then uploaded to BigQuery. This means that we have a nice strategy for searching very detailed fields (BigQuery) to get direct links to items (Storage). The dicom schema used by the som-tools looks like this:
30 |
31 | 
32 |
33 |
34 | ## Metadata
35 | We then create a collection, and in BigQuery, this corresponds to a Dataset, and we name it based on `GOOGLE_CLOUD_COLLECTION`, which should be an IRB number. Given that it already exists, it is just retrieved. We then create a table in the collection called "dicom" to indicate dicom images (or possibly compressed dicom?). The general idea behind a metadata database is to provide very rough, high level searchable fields that a researcher would be interested in, such as the age and gender, and the upload agent. We could add additional metadata here.
36 |
37 |
38 | ### Query in Console
39 | If you are using the Google Cloud Console, here are some helpful queries:
40 |
41 | ```
42 | SELECT * FROM IRB41449:Collection.__TABLES_SUMMARY__;
43 | ```
44 |
--------------------------------------------------------------------------------
/sendit/apps/main/templates/batch/batch_details.html:
--------------------------------------------------------------------------------
1 | {% extends "base/base.html" %}
2 | {% block content %}
3 |
74 |
75 | {% endblock %}
76 |
--------------------------------------------------------------------------------
/sendit/apps/api/views.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2017 Vanessa Sochat
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | SOFTWARE.
21 |
22 | '''
23 |
24 |
25 | from django.http import (
26 | Http404,
27 | JsonResponse,
28 | HttpResponse
29 | )
30 |
31 | from django.template import RequestContext
32 | from django.shortcuts import render, render_to_response
33 | from django.http import JsonResponse
34 | import hashlib
35 |
36 | from sendit.settings import API_VERSION as APIVERSION
37 | from sendit.apps.api.utils import get_size
38 | from sendit.apps.main.utils import get_database
39 | from sendit.apps.main.models import (
40 | Batch,
41 | Image
42 | )
43 |
44 | from rest_framework import viewsets, generics
45 | from rest_framework.response import Response
46 | from rest_framework.views import APIView
47 | from sendit.apps.api.serializers import (
48 | BatchSerializer,
49 | ImageSerializer
50 | )
51 |
52 | from django.contrib.auth.models import User
53 | from datetime import datetime, timedelta
54 | from glob import glob
55 | import json
56 |
57 | #########################################################################
58 | # GET
59 | # requests for information about reports and collections
60 | #########################################################################
61 |
62 | def api_view(request,api_version=None):
63 | if api_version == None:
64 | api_version = APIVERSION
65 | context = {"api_version":api_version,
66 | "active":"api"}
67 | return render(request, 'routes/api.html', context)
68 |
69 |
70 | class BatchViewSet(viewsets.ReadOnlyModelViewSet):
71 | '''A batch is a collection of images to be processed.
72 | '''
73 | queryset = Batch.objects.all().order_by('uid')
74 | serializer_class = BatchSerializer
75 |
76 |
77 | class ImageViewSet(viewsets.ReadOnlyModelViewSet):
78 | '''An image is one dicom image (beloning to a batch) to process
79 | '''
80 | queryset = Image.objects.all().order_by('uid')
81 | serializer_class = ImageSerializer
82 |
83 |
84 | def metrics_view(request):
85 | '''simple metrics to expose for local user'''
86 |
87 | base = get_database()
88 | timestamp = datetime.today().strftime('%Y-%m-%d-%H:%M:%S')
89 |
90 | batchlog = {'SEEN': Batch.objects.count(),
91 | 'SENT': Batch.objects.filter(status="DONE").count(),
92 | 'EMPTY':Batch.objects.filter(status="EMPTY").count(),
93 | 'QUEUE':Batch.objects.filter(status="QUEUE").count()}
94 |
95 | response = {"timestamp":timestamp,
96 | "data_root": base,
97 | "data_total": len(glob("%s/*" %(base))),
98 | "batches": batchlog}
99 |
100 | return JsonResponse(response)
101 |
102 |
103 |
104 | def gb_day(request=None, days=1):
105 | '''show gb per N days for user. (Default is 1)'''
106 |
107 | days_ago = datetime.today() - timedelta(days=int(days))
108 | timestamp = datetime.today().strftime('%Y-%m-%d-%H:%M:%S')
109 |
110 | total_gb = 0
111 | for batch in Batch.objects.all():
112 | if batch.status == "DONE":
113 | if "FinishTime" in batch.qa:
114 | finish_time = datetime.fromtimestamp(batch.qa['FinishTime'])
115 | if finish_time > days_ago:
116 | size=get_size(batch)
117 | total_gb += size
118 |
119 | gb_per_day = total_gb/days
120 |
121 | response = {"timestamp":timestamp,
122 | "gb_per_day": gb_per_day,
123 | "days": days}
124 |
125 | if request is None:
126 | return json.dumps(response)
127 | return JsonResponse(response)
128 |
--------------------------------------------------------------------------------
/docs/logging.md:
--------------------------------------------------------------------------------
1 | # Logging
2 |
3 | ## Google Sheets
4 | Sendit has a helper script that can be run with cron to update a Google Sheet at some
5 | frequency with GB/day. Note that this assumes the following headers:
6 |
7 | ```
8 | pipeline | start_date | end_date | duration (days) | G/day Getit | G/day Sendit
9 | 1 | 9/11/2017 | 9/18/2017 | 7 | 300 | 77.0
10 | ```
11 |
12 | The titles are not important, but rather, the order and indexes. If you change this standard,
13 | you should update the script [save_google_sheets.py](../scripts/save_google_sheets.py).
14 |
15 | ### 1. Set up Authentication
16 | You will need to generate an [OAuth2 token](https://developers.google.com/sheets/api/guides/authorizing) for sheets on the server. This should be saved
17 | to your server somewhere, the full file path accessible via the environment variable `GOOGLE_SHEETS_CREDENTIALS`.
18 |
19 | ```
20 | GOOGLE_SHEETS_CREDENTIALS=/path/to/client_secrets.json
21 | export GOOGLE_SHEETS_CREDENTIALS
22 | ```
23 |
24 | It's ultimately going to generate a credentials file under your /home/.credentials directory. If you have trouble doing this on the server, do it locally and transfer the file.
25 |
26 | ### 1. Set up Cron
27 | Running the script comes down to adding a line to crontab. This is NOT on the server (host) but
28 | inside the image. Remember in the Dockerfile we installed crontab as follows:
29 |
30 | ```
31 | # Install crontab to setup job
32 | apt-get update && apt-get install -y gnome-schedule
33 | ```
34 |
35 | You then want to edit the script [save_google_sheets.sh](../scripts/save_google_sheets.sh) to include
36 | the specific sheet id. We take this approach (instead of adding it to crontab) so that if we need to
37 | change the call, we don't need to edit crontab. Then we echo the line to crontab, and this command
38 | will ensure it happens nightly at midnight (feel free to change the frequency)
39 |
40 | ```
41 | echo "0 0 * * * /bin/bash /code/scripts/save_google_sheets.sh" >> /code/cronjob
42 | crontab /code/cronjob
43 | ```
44 |
45 | The script uses the simple sheets client [provided by som-tools](https://github.com/vsoch/som/blob/master/som/api/google/sheets/client.py#L44), and adds an extra check to make sure column headers have not changed.
46 | If a change is found, the new row isn't added (assuming the sheet has changed).
47 |
48 | ## Internal Logging
49 | The application has a simple logger, defined at [../sendit/logger.py](logger.py). To use it, you import as follows:
50 |
51 | ```
52 | from sendit.logger import bot
53 | ```
54 |
55 | and then issue messages at whatever level is suitable for the message:
56 |
57 | ```
58 | bot.abort("This is an abort message")
59 | bot.error("This is a debug message")
60 | bot.warning("This is a warning message")
61 | bot.log("This is a log message")
62 |
63 | bot.log("This is a debug message")
64 | bot.info("This is an info message")
65 | bot.verbose("This is regular verbose")
66 | bot.verbose2("This is level 2 verbose")
67 | bot.verbose3("This is level 3 verbose")
68 | bot.debug("This is a debug message")
69 | ```
70 |
71 | All logger commands will print the level by default, except for info, which looks like a message to the console (usually for the user), and except for quiet, which isn't a level that is used in code, but a level the user can specify to not print anything, ever.
72 |
73 | ## Errors
74 | You can inspect errors via the batch view [interface](interface.md) or from the command line. To look for errors across all batches:
75 |
76 | ```
77 | python manage.py batch_logs
78 | There are no batches with error.
79 | ```
80 |
81 | and to select one or more specific batches based on their id (the number associated with the url in the browser, or the `batch.id` as a variable):
82 |
83 | ```
84 | python manage.py batch_logs 1
85 | DEBUG Inspecting for errors for 1 batch ids
86 | There are no batches with error.
87 |
88 | python manage.py batch_logs 1 2
89 | DEBUG Inspecting for errors for 2 batch ids
90 | There are no batches with error.
91 | ```
92 |
93 |
94 | ## Settings
95 | By default, the logger will have `debug` mode, which coincides with a level of `5`. You can customize this level at any point by setting the environment variable `SENDIT_MESSAGELEVEL`. In your `secrets.py` this might look like this:
96 |
97 |
98 | ```
99 | import os
100 | os.environ['SENDIT_MESSAGELEVEL'] = 2
101 | ```
102 |
103 | The levels supported include the following:
104 |
105 | - ABRT = -4
106 | - ERROR = -3
107 | - WARNING = -2
108 | - LOG = -1
109 | - QUIET = 0
110 | - INFO = 1
111 | - VERBOSE = 2
112 | - VERBOSE2 = 3
113 | - VERBOSE3 = 4
114 | - DEBUG = 5
115 |
116 |
117 | The logger can write it's output to file, or do something else, but isn't configured to do anything other than the above currently.
118 |
--------------------------------------------------------------------------------
/docs/start.md:
--------------------------------------------------------------------------------
1 | # Start the Application
2 | After configuration is done and you have a good understanding of how things work, you are ready to turn it on! You have two options - using the watcher (better for on demand, streamed processing) or with a cached queue (better if many datasets are already present). For both, an important note is that each job added to the queue to do dicom import will also handle the complete processing for that job. This is so that we don't have a tasks in the queue relevant
3 | to the same job (for example, imagine a queue of 1000, and adding the "next step" for the first
4 | item to the end. We wouldn't process it until the other 999 are started! Our disk might run
5 | out of space.
6 |
7 | ## Cached Queue
8 | This approach add jobs to a queue and they are processed when workers are available. This is a slightly longer process since it needs to read the filesystem, but it's only run when the
9 | previous set of folders found and queued is empty (meaning no Batch objects with status `QUEUE`).
10 | A cached queue is NOT processed by way of the watcher, but instead the python manage.py start_queue.py script:
11 |
12 | ```
13 | python manage.py start_queue
14 | ```
15 |
16 | optionally you can provide the following arguments:
17 |
18 | ```
19 | --number: a max count to add to the queue
20 | --subfolder: optionally, a subfolder to use assumed in /data, to take preference
21 | ```
22 |
23 | without any arguments, it goes over the bases defined as subfolders to create the cache
24 |
25 | ```
26 | DATA_INPUT_FOLDERS=['/data/1_%s' %s for x in range(8) ] # /data/1_0 through /data/1_7
27 | ```
28 |
29 | The cache will not be generated until the current set is done and processed.
30 |
31 |
32 | ## Streaming with Watcher
33 | The watcher is intended to be used for streaming data. The folders will be looked for in the `DATA_BASE` and optionally a specific subfolder, if defined:
34 |
35 |
36 | ```
37 | # Optionally, parse a subfolder under /data, or set to None
38 | DATA_SUBFOLDER="1_6"
39 | ```
40 |
41 | First, let's learn about how to start and stop the watcher, and the kind of datasets and location that the watcher is expecting. It is up to you to plop these dataset folders into the application's folder being watched.
42 |
43 | ## 1. Running the Watcher
44 | This initial setup is stupid in that it's going to be checking an input folder to find new images. We do this using the [watcher](../sendit/apps/watcher) application, which is started and stopped with a manage.py command:
45 |
46 | ```
47 | python manage.py start_watcher
48 | python manage.py stop_watcher
49 | ```
50 |
51 | And the default is to watch for files added to [data](../data), which is mapped to '/data' in the container. Remember that you can change this mapping in the [docker-compose.yml](../docker-compose.yml). In terms of the strategy for receiving the folders, this is currently up to you, but the high level idea is that the application should receive DICOM from somewhere. It should use an atomic download strategy, but with folders, into the application data input folder. This will mean that when it starts, the folder (inside the container) might look like:
52 |
53 |
54 | ```bash
55 | /data
56 | ST-000001.tmp2343
57 | image1.dcm
58 | image2.dcm
59 | image3.dcm
60 |
61 | ```
62 | Only when all of the dicom files are finished copying will the driving function rename it to be like this:
63 |
64 |
65 | ```bash
66 | /data
67 | ST-000001
68 | image1.dcm
69 | image2.dcm
70 | image3.dcm
71 |
72 | ```
73 |
74 | A directory is considered "finished" and ready for processing when it does **not** have an entension that starts with "tmp". For more details about the watcher daemon, you can look at [his docs](watcher.md). While many examples are provided, for this application we use the celery task `import_dicomdir` in [main/tasks.py](../sendit/apps/main/tasks.py) to read in a finished dicom directory from the directory being watched, and this uses the class `DicomCelery` in the [event_processors](../sendit/apps/watcher/event_processors.py) file. Other examples are provided, in the case that you want to change or extend the watcher daemon. For complete details about the import of dicom files, see [dicom_import.md](dicom_import.md)
75 |
76 |
77 | ## 2. Database Models
78 | The Dockerized application is constantly monitoring the folder to look for folders that are not in the process of being populated. When a folder is found:
79 |
80 | - A new object in the database is created to represent the "Batch"
81 | - Each "Image" is represented by an equivalent object
82 | - Each "Image" is linked to its "Batch"
83 | - Currently, all uids for each must be unique.
84 |
85 | Generally, the query of interest will retrieve a set of images with an associated accession number, and the input folder will be named by the accession number. Since there is variance in the data with regard to `AccessionNumber` and different series identifiers, for our batches we give them ids based on the folder name.
86 |
87 | Now that the application is started, you can learn about usage, starting with the [manager](manager.md), or check out details about the simple [interface](interface.md).
88 |
--------------------------------------------------------------------------------
/sendit/apps/main/static/css/papaya.css:
--------------------------------------------------------------------------------
1 | .papaya{width:90%;height:90%;margin:25px auto;background-color:black;font-family:sans-serif}.papaya:before{position:relative;content:"Papaya requires JavaScript...";display:block;top:45%;color:red;margin:0 auto;font-size:18px;font-family:sans-serif}.papaya-fullscreen{height:100%}.papaya-toolbar{text-align:left;box-sizing:content-box}.papaya-toolbar ul{margin:0;list-style:none}.papaya-toolbar input[type=file]{text-align:right;display:none}.papaya-kiosk-controls{margin:5px auto;list-style:none;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.papaya-kiosk-controls ul{list-style:none}.papaya-kiosk-controls button{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;font-size:14px;height:25px;background-color:lightgray}.papaya-control-increment{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;font-size:14px;height:25px;width:25px;text-align:center;vertical-align:middle;padding:0;margin-left:auto;margin-right:auto;line-height:16px;box-sizing:border-box;font-family:"Courier New",Courier,monospace}.papaya-main-increment{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;font-size:14px;height:25px;width:25px;text-align:center;font-family:"Courier New",Courier,monospace;background-color:lightgray;vertical-align:middle;padding:0;margin-left:auto;margin-right:auto;box-sizing:border-box;outline:0}.papaya-main-decrement{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;font-size:14px;height:25px;width:25px;text-align:center;font-family:"Courier New",Courier,monospace;background-color:lightgray;vertical-align:middle;padding:0;margin-left:auto;margin-right:auto;box-sizing:border-box;outline:0}.papaya-main-swap{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;font-size:14px;height:25px;background-color:lightgray;outline:0}.papaya-main-goto-center{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;font-size:14px;height:25px;background-color:lightgray;outline:0}.papaya-main-goto-origin{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;font-size:14px;height:25px;background-color:lightgray;outline:0}.papaya-slider-slice{padding:0 5px;display:inline}.papaya-slider-slice span{font-size:14px;font-family:sans-serif;vertical-align:middle}.papaya-slider-slice button{-webkit-appearance:none;border-radius:0;-webkit-border-radius:0;vertical-align:middle;font-size:14px;height:25px;background-color:lightgray}.papaya-controlbar-label{color:#000}.papaya-menu{width:220px;background:#222;z-index:100;position:absolute;border:solid 2px darkgray;padding:4px;margin:0}.papaya-menu li{font-size:12px;font-family:sans-serif;padding:4px 2px;color:#b5cbd3;cursor:pointer;list-style-type:none}.papaya-menu-label{font-size:14px;font-family:sans-serif;font-weight:bold;padding:2px 8px;cursor:pointer;vertical-align:text-bottom}.papaya-menu-titlebar{font-size:16px;font-family:sans-serif;padding:3px 8px 0 8px;cursor:default;vertical-align:text-bottom;color:white}.papaya-menu-icon{margin-left:5px}.papaya-menu-icon img{box-sizing:content-box}.papaya-menu-hovering{background-color:#444}.papaya-menu-spacer{height:8px}.papaya-menu-unselectable{-moz-user-select:-moz-none;-khtml-user-select:none;-webkit-user-select:none;-ms-user-select:none;user-select:none;-webkit-user-drag:none;user-drag:none}.papaya-menu-button-hovering{background-color:#DDD}.papaya-menu-filechooser{cursor:pointer;width:200px;display:inline-block;font-weight:normal}.papaya-menu-input{width:38px;margin-right:5px;color:black}li .papaya-menu-slider{vertical-align:middle;text-align:center;display:inline;width:120px;padding:0;margin:0}.papaya-dialog{min-width:400px;max-width:500px;height:500px;background:#222;position:absolute;z-index:100;border:solid 2px darkgray;padding:6px;font-size:14px;font-family:sans-serif;color:#b5cbd3;box-sizing:content-box;line-height:1.45}.papaya-dialog-content{margin:20px;height:415px;color:#dedede;overflow:auto;-ms-overflow-style:-ms-autohiding-scrollbar}.papaya-dialog-content-nowrap{white-space:nowrap}.papaya-dialog-content table{margin:0 auto}.papaya-dialog-content-label{text-align:right;padding:5px;color:#b5cbd3}.papaya-dialog-content-control{text-align:left;padding:5px}.papaya-dialog-content-help{text-align:right;padding:5px;color:lightgray;font-size:12px}.papaya-dialog-title{color:#b5cbd3;font-weight:bold;font-size:16px}.papaya-dialog-button{text-align:right;box-sizing:content-box;height:22px}.papaya-dialog-button button{box-sizing:content-box;color:black;font-size:11px}.papaya-dialog-background{position:fixed;top:0;left:0;background-color:#fff;width:100%;height:100%;opacity:.5}.papaya-dialog-stopscroll{height:100%;overflow:hidden}.checkForJS{width:90%;height:90%;margin:25px auto;background-color:black}.checkForJS:before{position:relative;content:"Papaya requires JavaScript...";display:block;top:45%;color:red;margin:0 auto;font-size:18px;font-family:sans-serif;text-align:center}.papaya-utils-unsupported{width:90%;height:90%;margin:25px auto;background-color:black}.papaya-utils-unsupported-message{position:relative;display:block;top:45%;color:red;margin:0 auto;font-size:18px;font-family:sans-serif;text-align:center}.papaya-viewer{line-height:1;font-family:sans-serif}.papaya-viewer div,.papaya-viewer canvas{margin:0;padding:0;border:0;font:inherit;font-size:100%;vertical-align:baseline;font-family:sans-serif}.papaya-viewer canvas{cursor:crosshair}
--------------------------------------------------------------------------------
/sendit/apps/watcher/commands.py:
--------------------------------------------------------------------------------
1 | '''
2 | commands for the watcher. If submit from management, will output a CommandError
3 | if running from management, set as_command=True
4 |
5 | Copyright (c) 2017 Vanessa Sochat
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 |
25 | :: note
26 | For these functions, if as_command is
27 | True, we assume this is coming from the terimal (and not web interface)
28 | If False, errors are sent back to the calling user via messages in the
29 | request. If there is an error, the status returned is None, and a message
30 | is returned to the user to indicate why. If successful, and call is from
31 | the application, the notifier is returned.
32 |
33 | '''
34 | from django.contrib import messages
35 | from sendit.logger import bot
36 | from sendit.apps.watcher.utils import (
37 | get_daemon_kwargs,
38 | get_notifier,
39 | get_pid_file,
40 | verify_monitor_paths,
41 | watcher_error,
42 | watcher_message
43 | )
44 |
45 | from django.conf import settings
46 | import os
47 | import time
48 |
49 |
50 | def start_watcher(request=None,as_command=False):
51 | '''start the watcher, if the process is not started.
52 | '''
53 |
54 | # Verify INOTIFIER_WATCH_PATHS is defined and non-empty
55 | try:
56 | assert settings.INOTIFIER_WATCH_PATHS
57 | except (AttributeError, AssertionError):
58 | return watcher_error(message="Missing/empty settings/watcher.py INOTIFY_WATCH_PATHS",
59 | as_command=as_command,
60 | request=request)
61 |
62 |
63 | # Verify INOTIFIER_WATCH_PATHS is properly formatted
64 | try:
65 | length_3 = [len(tup) == 3 for tup in settings.INOTIFIER_WATCH_PATHS]
66 | assert all(length_3)
67 | except AssertionError:
68 | message = '''setting INOTIFIER_WATCH_PATHS should be an iterable of
69 | 3-tuples of the form [ ("/path1/", , ), ]'''
70 | return watcher_error(message=message,
71 | as_command=as_command,
72 | request=request)
73 |
74 |
75 | error_message = verify_monitor_paths(return_message=True)
76 | if error_message is not None:
77 | return watcher_error(message=error_message,
78 | as_command=as_command,
79 | request=request)
80 |
81 |
82 | # Setup watches using pyinotify
83 | notifier = get_notifier()
84 |
85 | # Error with import or setup returns None
86 | if notifier is None:
87 | return watcher_error(message="Cannot import pyinotify.",
88 | as_command=as_command,
89 | request=request)
90 |
91 | pid_file = get_pid_file()
92 |
93 | # Daemonize, killing any existing process specified in pid file
94 | daemon_kwargs = get_daemon_kwargs()
95 | notifier.loop(daemonize=True, pid_file=pid_file, **daemon_kwargs)
96 | watcher_message(message="Dicom watching has been started.",request=request)
97 |
98 |
99 |
100 | def stop_watcher(request=None,as_command=False):
101 | '''stop the watcher, if the process is started. Returns True
102 | if success.
103 | '''
104 |
105 | pid_file = get_pid_file()
106 |
107 | if os.path.exists(pid_file):
108 | pid = int(open(pid_file).read())
109 |
110 | import signal
111 | try:
112 | os.kill(pid, signal.SIGHUP)
113 | except OSError:
114 | os.remove(pid_file)
115 | # This needs testing - shouldn't normally trigger when stopped
116 | return watcher_error(message="Cleaned up pid file %s" %(pid),
117 | as_command=as_command,
118 | request=request)
119 | time.sleep(2)
120 |
121 | try:
122 | os.kill(pid, signal.SIGKILL)
123 | except OSError:
124 | pass
125 |
126 | os.remove(pid_file)
127 | watcher_message(message="Dicom watching has been stopped.",request=request)
128 | else:
129 | return watcher_error(message="No pid file exists. The watched is not started.",
130 | as_command=as_command,
131 | request=request)
132 | if not as_command:
133 | return True
134 |
--------------------------------------------------------------------------------
/sendit/settings/main.py:
--------------------------------------------------------------------------------
1 | '''
2 | Django settings for sendit project.
3 |
4 | Copyright (c) 2017 Vanessa Sochat
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | The `urlpatterns` list routes URLs to views. For more information please see:
24 | https://docs.djangoproject.com/en/1.9/topics/http/urls/
25 |
26 | '''
27 |
28 |
29 | import os
30 |
31 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
32 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
33 |
34 | DOMAIN_NAME = "https://send.it"
35 | DOMAIN_NAME_HTTP = "http://send.it"
36 | ADMINS = (('vsochat', 'vsochat@stanford.com'),)
37 | MANAGERS = ADMINS
38 |
39 | ALLOWED_HOSTS = ["*"]
40 |
41 | # Application definition
42 |
43 |
44 | MIDDLEWARE_CLASSES = [
45 | 'django.middleware.security.SecurityMiddleware',
46 | 'django.contrib.sessions.middleware.SessionMiddleware',
47 | 'django.middleware.common.CommonMiddleware',
48 | 'django.middleware.csrf.CsrfViewMiddleware',
49 | 'opbeat.contrib.django.middleware.OpbeatAPMMiddleware',
50 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
51 | 'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
52 | 'django.contrib.messages.middleware.MessageMiddleware',
53 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
54 | 'lockdown.middleware.LockdownMiddleware',
55 | ]
56 |
57 | ROOT_URLCONF = 'sendit.urls'
58 |
59 | TEMPLATES = [
60 | {
61 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
62 | 'DIRS': [],
63 | 'APP_DIRS': True,
64 | 'OPTIONS': {
65 | 'context_processors': [
66 | 'django.template.context_processors.debug',
67 | 'django.template.context_processors.request',
68 | 'django.contrib.auth.context_processors.auth',
69 | 'django.contrib.messages.context_processors.messages',
70 | 'sendit.apps.base.context_processors.domain_processor', #custom context processor
71 | ],
72 | },
73 | },
74 | ]
75 |
76 | TEMPLATES[0]['OPTIONS']['debug'] = True
77 | WSGI_APPLICATION = 'sendit.wsgi.application'
78 |
79 |
80 | # Database
81 | # https://docs.djangoproject.com/en/1.9/ref/settings/#databases
82 |
83 | DATABASES = {
84 | 'default': {
85 | 'ENGINE': 'django.db.backends.postgresql_psycopg2',
86 | 'NAME': 'postgres',
87 | 'USER': 'postgres',
88 | 'HOST': 'db',
89 | 'PORT': '5432',
90 | }
91 | }
92 |
93 |
94 | # Password validation
95 | # https://docs.djangoproject.com/en/1.9/ref/settings/#auth-password-validators
96 |
97 | AUTH_PASSWORD_VALIDATORS = [
98 | {
99 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
100 | },
101 | {
102 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
103 | },
104 | {
105 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
106 | },
107 | {
108 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
109 | },
110 | ]
111 |
112 | # Api
113 | API_VERSION = "v1"
114 |
115 | REST_FRAMEWORK = {
116 |
117 | #'DEFAULT_PERMISSION_CLASSES': [
118 | # 'rest_framework.permissions.IsAuthenticated',
119 | #],
120 | 'DEFAULT_AUTHENTICATION_CLASSES': (
121 | 'rest_framework.authentication.TokenAuthentication',
122 | ),
123 |
124 | 'PAGE_SIZE': 10
125 | }
126 |
127 |
128 | # Internationalization
129 | # https://docs.djangoproject.com/en/1.9/topics/i18n/
130 |
131 | LANGUAGE_CODE = 'en-us'
132 | TIME_ZONE = 'America/Chicago'
133 | USE_I18N = True
134 | USE_L10N = True
135 | USE_TZ = True
136 |
137 | SESSION_SERIALIZER = 'django.contrib.sessions.serializers.PickleSerializer'
138 |
139 | SENDFILE_BACKEND = 'sendfile.backends.development'
140 | PRIVATE_MEDIA_REDIRECT_HEADER = 'X-Accel-Redirect'
141 | CRISPY_TEMPLATE_PACK = 'bootstrap3'
142 |
143 | CACHES = {
144 | 'default': {
145 | 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
146 | }
147 | }
148 |
149 | # Static files (CSS, JavaScript, Images)
150 | # https://docs.djangoproject.com/en/1.9/howto/static-files/
151 |
152 | MEDIA_ROOT = '/var/www/images'
153 | MEDIA_URL = '/images/'
154 | STATIC_ROOT = '/var/www/static'
155 | STATIC_URL = '/static/'
156 |
157 | # Gravatar
158 | GRAVATAR_DEFAULT_IMAGE = "retro"
159 |
--------------------------------------------------------------------------------
/sendit/apps/base/static/js/paper-dashboard.js:
--------------------------------------------------------------------------------
1 | /*!
2 |
3 | =========================================================
4 | * Paper Dashboard - v1.1.2
5 | =========================================================
6 |
7 | * Product Page: http://www.creative-tim.com/product/paper-dashboard
8 | * Copyright 2017 Creative Tim (http://www.creative-tim.com)
9 | * Licensed under MIT (https://github.com/creativetimofficial/paper-dashboard/blob/master/LICENSE.md)
10 |
11 | =========================================================
12 |
13 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 |
15 | */
16 |
17 |
18 | var fixedTop = false;
19 | var transparent = true;
20 | var navbar_initialized = false;
21 |
22 | $(document).ready(function(){
23 | window_width = $(window).width();
24 |
25 | // Init navigation toggle for small screens
26 | if(window_width <= 991){
27 | pd.initRightMenu();
28 | }
29 |
30 | // Activate the tooltips
31 | $('[rel="tooltip"]').tooltip();
32 |
33 | });
34 |
35 | // activate collapse right menu when the windows is resized
36 | $(window).resize(function(){
37 | if($(window).width() <= 991){
38 | pd.initRightMenu();
39 | }
40 | });
41 |
42 | pd = {
43 | misc:{
44 | navbar_menu_visible: 0
45 | },
46 | checkScrollForTransparentNavbar: debounce(function() {
47 | if($(document).scrollTop() > 381 ) {
48 | if(transparent) {
49 | transparent = false;
50 | $('.navbar-color-on-scroll').removeClass('navbar-transparent');
51 | $('.navbar-title').removeClass('hidden');
52 | }
53 | } else {
54 | if( !transparent ) {
55 | transparent = true;
56 | $('.navbar-color-on-scroll').addClass('navbar-transparent');
57 | $('.navbar-title').addClass('hidden');
58 | }
59 | }
60 | }),
61 | initRightMenu: function(){
62 | if(!navbar_initialized){
63 | $off_canvas_sidebar = $('nav').find('.navbar-collapse').first().clone(true);
64 |
65 | $sidebar = $('.sidebar');
66 | sidebar_bg_color = $sidebar.data('background-color');
67 | sidebar_active_color = $sidebar.data('active-color');
68 |
69 | $logo = $sidebar.find('.logo').first();
70 | logo_content = $logo[0].outerHTML;
71 |
72 | ul_content = '';
73 |
74 | // set the bg color and active color from the default sidebar to the off canvas sidebar;
75 | $off_canvas_sidebar.attr('data-background-color',sidebar_bg_color);
76 | $off_canvas_sidebar.attr('data-active-color',sidebar_active_color);
77 |
78 | $off_canvas_sidebar.addClass('off-canvas-sidebar');
79 |
80 | //add the content from the regular header to the right menu
81 | $off_canvas_sidebar.children('ul').each(function(){
82 | content_buff = $(this).html();
83 | ul_content = ul_content + content_buff;
84 | });
85 |
86 | // add the content from the sidebar to the right menu
87 | content_buff = $sidebar.find('.nav').html();
88 | ul_content = ul_content + ''+ content_buff;
89 |
90 | ul_content = '
';
94 |
95 | $off_canvas_sidebar.html(navbar_content);
96 |
97 | $('body').append($off_canvas_sidebar);
98 |
99 | $toggle = $('.navbar-toggle');
100 |
101 | $off_canvas_sidebar.find('a').removeClass('btn btn-round btn-default');
102 | $off_canvas_sidebar.find('button').removeClass('btn-round btn-fill btn-info btn-primary btn-success btn-danger btn-warning btn-neutral');
103 | $off_canvas_sidebar.find('button').addClass('btn-simple btn-block');
104 |
105 | $toggle.click(function (){
106 | if(pd.misc.navbar_menu_visible == 1) {
107 | $('html').removeClass('nav-open');
108 | pd.misc.navbar_menu_visible = 0;
109 | $('#bodyClick').remove();
110 | setTimeout(function(){
111 | $toggle.removeClass('toggled');
112 | }, 400);
113 |
114 | } else {
115 | setTimeout(function(){
116 | $toggle.addClass('toggled');
117 | }, 430);
118 |
119 | div = '';
120 | $(div).appendTo("body").click(function() {
121 | $('html').removeClass('nav-open');
122 | pd.misc.navbar_menu_visible = 0;
123 | $('#bodyClick').remove();
124 | setTimeout(function(){
125 | $toggle.removeClass('toggled');
126 | }, 400);
127 | });
128 |
129 | $('html').addClass('nav-open');
130 | pd.misc.navbar_menu_visible = 1;
131 |
132 | }
133 | });
134 | navbar_initialized = true;
135 | }
136 |
137 | }
138 | }
139 |
140 |
141 | // Returns a function, that, as long as it continues to be invoked, will not
142 | // be triggered. The function will be called after it stops being called for
143 | // N milliseconds. If `immediate` is passed, trigger the function on the
144 | // leading edge, instead of the trailing.
145 |
146 | function debounce(func, wait, immediate) {
147 | var timeout;
148 | return function() {
149 | var context = this, args = arguments;
150 | clearTimeout(timeout);
151 | timeout = setTimeout(function() {
152 | timeout = null;
153 | if (!immediate) func.apply(context, args);
154 | }, wait);
155 | if (immediate && !timeout) func.apply(context, args);
156 | };
157 | };
158 |
--------------------------------------------------------------------------------
/sendit/apps/watcher/utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | utilities for the watcher
3 |
4 | Copyright (c) 2017 Vanessa Sochat
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 | '''
25 |
26 | from django.contrib.auth.decorators import login_required
27 | from django.contrib.auth.models import User
28 | from sendit.logger import bot
29 | from django.contrib import messages
30 |
31 | from django.core.management.base import (
32 | CommandError
33 | )
34 |
35 | from sendit.settings import (
36 | BASE_DIR,
37 | MEDIA_ROOT
38 | )
39 | from django.conf import settings
40 | import os
41 |
42 | media_dir = os.path.join(BASE_DIR,MEDIA_ROOT)
43 |
44 |
45 | def get_level():
46 | '''get level will return the python version for
47 | the user, corresponding to the watcher level
48 | '''
49 | import six
50 | if six.PY3:
51 | return 0
52 | else:
53 | return -1
54 |
55 | def get_daemon_kwargs():
56 | '''returns the stderr and stdout log file locations
57 | for the daemon based on the user configuration settings'''
58 | daemon_kwargs = {}
59 | try:
60 | daemon_kwargs['stdout'] = settings.INOTIFIER_DAEMON_STDOUT
61 | except AttributeError:
62 | pass
63 |
64 | try:
65 | daemon_kwargs['stderr'] = settings.INOTIFIER_DAEMON_STDERR
66 | except AttributeError:
67 | pass
68 | return daemon_kwargs
69 |
70 |
71 | def is_watching():
72 | '''get_status turns the status of the watcher based on
73 | the active process read in from the pid file.
74 | '''
75 | pid_file = get_pid_file(quiet=True)
76 | if os.path.exists(pid_file):
77 | pid = int(open(pid_file).read())
78 | try:
79 | os.kill(pid, 0)
80 | except OSError:
81 | return False
82 | return True
83 | else:
84 | return False
85 |
86 |
87 | def get_pid_file(quiet=False):
88 | '''get_pid_file will return a path to write the pid file,
89 | based on the configuration (user settings)
90 | '''
91 | try:
92 | pid_file = os.path.join(settings.BASE_DIR, 'watcher.pid')
93 | except AttributeError:
94 | pid_file = os.path.join("/tmp", "watcher.pid")
95 | if not quiet:
96 | if os.path.exists(pid_file):
97 | bot.debug("pid file is at %s" %(pid_file))
98 | else:
99 | bot.debug("pid file set to %s" %(pid_file))
100 |
101 | return pid_file
102 |
103 |
104 | def get_notifier():
105 | '''get notifier will return a basic pyinotify watch manager
106 | based on the user's inotify watch paths in settings.
107 | if there is an error, returns None.
108 | '''
109 |
110 | try:
111 | import pyinotify
112 | except ImportError as e:
113 | bot.error("pyinotify is not installed.")
114 | return None
115 |
116 | level = get_level()
117 | wm = pyinotify.WatchManager()
118 | for path, mask, processor_cls in settings.INOTIFIER_WATCH_PATHS:
119 | cls_path = '.'.join(processor_cls.split('.')[0:-1])
120 | cls = processor_cls.split('.')[-1]
121 | mod = __import__(cls_path, globals(), locals(), [cls], level)
122 | Processor = getattr(mod, cls)
123 | wm.add_watch(path, mask, proc_fun=Processor())
124 | bot.debug("Adding watch on %s, processed by %s" %(path, processor_cls))
125 |
126 | notifier = pyinotify.Notifier(wm)
127 | return notifier
128 |
129 | def verify_monitor_paths(return_message=False):
130 | '''verify monitor paths will check for monitor paths. If return_message is
131 | True, it returns the error message for another process to call/deal with, and None
132 | if all is good. If return message is False (default) it triggers the error.
133 | '''
134 | level = get_level()
135 |
136 | # Verify monitor_paths exists and processor classes can be imported
137 | for monitor, m, processor_cls in settings.INOTIFIER_WATCH_PATHS:
138 | if not os.path.exists(monitor):
139 | message="%s does not exist or you have insufficient permission" % monitor
140 | if return_message:
141 | return message
142 | return watcher_error(message=message,as_command=True)
143 |
144 | path = '.'.join(processor_cls.split('.')[0:-1])
145 | cls = processor_cls.split('.')[-1]
146 | try:
147 | mod = __import__(path, globals(), locals(), [cls], level)
148 | getattr(mod, cls)
149 | except ImportError as e:
150 | message='Cannot import event processor module: %s\n\n%s' %(path,e)
151 | if return_message:
152 | return message
153 | return watcher_error(message=message,as_command=True)
154 |
155 | except AttributeError:
156 | message='Cannot import event processor module: %s\n\n%s' %(path,e)
157 | if return_message:
158 | return message
159 | return watcher_error(message=message,as_command=True)
160 |
161 | return None
162 |
163 |
164 | def watcher_error(message,as_command,request=None):
165 | '''watcher_error will take the setting as_command (True means the call is from the command line,
166 | false means it is from a web application view) and a request (from a web view) and either issue
167 | a command error, or return None response, optionally with a message, to the calling function
168 | '''
169 | if as_command:
170 | raise CommandError(message)
171 | else:
172 | watcher_message(message,request=None)
173 | return None
174 |
175 |
176 | def watcher_message(message,request=None):
177 | '''if request is defined, a message is added.```
178 | '''
179 | if request is not None:
180 | messages.info(request,message)
181 | else:
182 | bot.debug(message)
183 |
--------------------------------------------------------------------------------
/docs/application.md:
--------------------------------------------------------------------------------
1 | # Application
2 |
3 | ## Overview
4 | Sendit generally works as follows:
5 |
6 | 1. datasets are mapped to the application container `/data` folder, and are processed in batches. Each folder associated with an accession number is considered a batch.
7 | 2. Header data is extracted for cleaning using the [deid](https://www.github.com/pydicom/deid) module. Sendit stores key/values for header data.
8 | 3. Sendit uses the [stanford open modules](https://www.github.com/vsoch/som) to interact with the Stanford (DASHER) API and retrieve anonymous ids for the data.
9 | 4. The data structure with headers is updated with the identifiers from DASHER, and the images are anonymized using this data structure per a specific customization defined in a deid recipe.
10 | 5. The final images and metadata are uploaded to Google Cloud again using the [stanford open modules](https://www.github.com/vsoch/som).
11 |
12 | From the above, you can see that sendit is akin to a glue to hold several APIs and customizations together.
13 |
14 | ## Sendit Containers
15 |
16 | This application lives in a docker-compose orchestration of images running on `STRIDE-HL71`. This application has the following components (each a Docker image):
17 |
18 | - **uwsgi**: is the main python application with Django (python)
19 | - **postgres**: is a postgres image database
20 | - **nginx**: is a web server to make a status web interface for Research IT
21 | - **worker**: is the same image as uwsgi, but configured to run a distributed job queue called [celery](http://www.celeryproject.org/).
22 | - **redis**: is the database used by the worker, with serialization in json.
23 |
24 |
25 | ## Job Queue
26 |
27 | ### Step 1: Start Queue
28 | The job queue accepts a manual request to import one or more dicom directories, subfolderes under `/data`. We call it a "queue" because it is handled by the worker and redis images, where the worker is a set of threads that can process multiple (~16) batches at once, and redis is the database to manage the queue. The queue can "pile up" and the workers will process tasks when the server has available resources. Thus, to start the pipeline:
29 |
30 | 1. You should make sure your `DATA_INPUT_FOLDERS` are defined in [sendit/settings/config.py](../sendit/settings/config.py).
31 | 2. You should then start the queue, which means performing dicom import, get_identifiers, replace identifiers (not upload). This means that images go from having status "QUEUE" to "DONEPROCESSING"
32 |
33 | ```
34 | # Start the queue
35 | python manage.py start_queue
36 |
37 | # The defaults are max count 1, /data folder
38 | python manage.py start_queue --number 1 --subfolder /data
39 |
40 | ```
41 |
42 | When you call the above, the workers will do the following:
43 |
44 | 1. Check for any Batch objects with status "QUEUE," meaning they were added and not started yet. If there are none in the QUEUE (the default when you haven't used it yet!) then the function uses the `DATA_INPUT_FOLDERS` to find new "contenders." The contender folders each have a Batch created for them, and the Batch is given status QUEUE. We do this up to the max count provided by the "number" variable in the `start_queue` request above.
45 | 2. Up to the max count, the workers then launch the [import dicom](import_dicom.md) task to run async. This function changes the Batch status to "PROCESSING," imports the dicom, extracts header information, prepares/sends/receives a request for [anonymized identifiers](anonymize.md) from DASHER, and then saves a BatchIdentifiers objects. The Batch then is given status "DONEPROCESSING".
46 |
47 | It is expected that a set of folders (batches) will do these steps first, meaning that there are no Batches with status "QUEUE" and all are "DONEPROCESSING." We do this because we want to upload to storage in large batches to optimize using the client.
48 |
49 |
50 | ### Step 2: Upload to Storage
51 | When all Batches have status "DONEPROCESSING" we launch a second request to the application to upload to storage:
52 |
53 | ```
54 | python manage.py upload_finished
55 | ```
56 |
57 | This task looks for Batches that are "DONEPROCESSING" and distributes the Batches equally among 10 workers. 10 is not a magic number, but I found in testing was a good balance to not trigger weird connection errors that likely come from the fact we are trying to use network resources from inside a Docker container. Sending to storage means two steps:
58 |
59 | 1. Upload Images (compressed .tar.gz) to Google Storage, and receive back metadata about bucket locations
60 | 2. Send image metadata + storage metadata to BigQuery
61 |
62 | If you are more interested in reading about the storage formats, read more about [storage](storage.md).
63 |
64 | ## Status
65 | In order to track status of images, we have status states for batches.
66 |
67 |
68 | ```
69 | BATCH_STATUS = (('QUEUE', 'The batch is queued and not picked up by worker.'),
70 | ('NEW', 'The batch was just added to the application.'),
71 | ('EMPTY', 'After processing, no images passed filtering.'),
72 | ('PROCESSING', 'The batch currently being processed.'),
73 | ('DONE','The batch is done, and images are ready for cleanup.'))
74 | ```
75 |
76 | You can use the command line manage.py to export a table of processing times and status:
77 |
78 | ```
79 | python manage.py export_metrics
80 | sendit-process-time-2017-08-26.tsv
81 | ```
82 |
83 | ## Errors
84 | The most likely error would be an inability to read a dicom file, which could happen for any number of reasons. This, and generally any errors that are triggered during the lifecycle of a batch, will flag the batch as having an error. The variable `has_error` is a boolean that belongs to a batch, and a matching JSONField `errors` will hold a list of errors for the user. This error flag will be most relevant during cleanup.
85 |
86 | For server errors, the application is configured to be set up with Opbeat. @vsoch has an account that can handle Stanford deployed applications, and all others should follow instructions for setup [on the website](opbeat.com/researchapps). It comes down to adding a few lines to the [main settings](sendit/settings/main.py). Opbeat (or a similar service) is essential for being notified immediately when any server error is triggered.
87 |
88 |
89 | ## Cleanup
90 | Upon completion, we will want some level of cleanup of both the database, and the corresponding files. It is already the case that the application moves the input files from `/data` into its own media folder (`images`), and cleanup might look like any of the following:
91 |
92 | - In the most ideal case, there are no errors, no flags for the batch, and the database and media files removed after successful upload to storage. Eventually we would want to delete the original files too. This application is not intended as some kind of archive for data, but a node that filters and passes along.
93 | - Given an error to `dicom_import`, a file will be left in the original folder, and the batch `has_error` will be true. In this case, we don't delete files, and we rename the original folder to have extension `.err`
94 |
95 | Now let's [start the application](start.md)!
96 |
--------------------------------------------------------------------------------
/sendit/apps/main/tasks/update.py:
--------------------------------------------------------------------------------
1 | '''
2 |
3 | Copyright (c) 2017 Vanessa Sochat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | '''
24 |
25 | from sendit.logger import bot
26 | from celery import (
27 | shared_task,
28 | Celery
29 | )
30 |
31 | from sendit.logger import bot
32 | from sendit.apps.main.models import (
33 | Batch,
34 | BatchIdentifiers,
35 | Image
36 | )
37 |
38 | from sendit.apps.main.tasks.utils import (
39 | add_batch_error,
40 | change_status
41 | )
42 |
43 | from deid.dicom import (
44 | replace_identifiers as replace_ids,
45 | get_shared_identifiers
46 | )
47 |
48 | from deid.identifiers import clean_identifiers
49 | from som.api.identifiers.dicom import prepare_identifiers
50 | from sendit.apps.main.tasks.finish import upload_storage
51 |
52 | from sendit.settings import (
53 | ANONYMIZE_PIXELS,
54 | ANONYMIZE_RESTFUL,
55 | SOM_STUDY,
56 | STUDY_DEID,
57 | ENTITY_ID,
58 | ITEM_ID
59 | )
60 |
61 | from django.conf import settings
62 | import os
63 | import time
64 | from copy import deepcopy
65 |
66 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'sendit.settings')
67 | app = Celery('sendit')
68 | app.config_from_object('django.conf:settings')
69 | app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
70 |
71 |
72 | @shared_task
73 | def scrub_pixels(bid):
74 | '''scrub pixels (not currently triggered) will be run to scrub pixel data
75 | before identifiers are extracted from the header.
76 | '''
77 | from .get import get_identifiers
78 | batch = Batch.objects.get(id=bid)
79 | images = batch.image_set.all()
80 | batch.change_images_status('PROCESSING')
81 |
82 | # from deid.dicom import scrub_pixels
83 |
84 | for dcm in images:
85 |
86 | dcm_file = dcm.image.path
87 | dicom_uid = os.path.basename(dcm_file)
88 | dicom = dcm.load_dicom()
89 |
90 | if dicom.get("BurnedInAnnotation") is not None:
91 |
92 | # We shouldn't be in this function if False, but we will check again anyway
93 | if ANONYMIZE_PIXELS is True:
94 | print("Anonymization will be done here.")
95 | else:
96 | message = "%s has pixel identifiers, anonymize pixels is off, but added to batch. Removing!" %dcm_file
97 | dicom.delete() # if django-cleanup not in apps, will not delete image file
98 | batch = add_batch_error(message,batch)
99 |
100 | # At the end, move on to processing headers
101 | return get_identifiers(bid=batch.id)
102 |
103 |
104 | @shared_task
105 | def replace_identifiers(bid, run_upload_storage=False):
106 | '''replace identifiers is called from get_identifiers, given that the user
107 | has asked to anonymize_restful. This function will do the replacement,
108 | and then trigger the function to send to storage
109 | '''
110 |
111 | batch = Batch.objects.get(id=bid)
112 | batch.qa['ProcessStartTime'] = time.time()
113 | batch_ids = BatchIdentifiers.objects.get(batch=batch)
114 |
115 | # 1) use response from API to generate new fields
116 | working = deepcopy(batch_ids.ids)
117 | prepared = prepare_identifiers(response=batch_ids.response,
118 | ids=working)
119 | updated = deepcopy(prepared)
120 | # 3) use response from API to anonymize all fields in batch.ids
121 | # clean_identifiers(ids, deid=None, image_type=None, default=None)
122 | # deid as None will use default "deid.dicom" provided in application
123 | # specifying a custom file/tag will use this filter first (in addition)
124 | deid = STUDY_DEID
125 | cleaned = clean_identifiers(ids=updated,
126 | default="KEEP",
127 | deid=deid)
128 | # Save progress
129 | batch_ids.cleaned = cleaned
130 | batch_ids.updated = updated
131 | batch_ids.save()
132 |
133 | # Get updated files
134 | dicom_files = batch.get_image_paths()
135 | output_folder = batch.get_path()
136 | updated_files = replace_ids(dicom_files=dicom_files,
137 | deid=deid,
138 | ids=updated, # ids[item] lookup
139 | overwrite=True, # overwrites copied files
140 | output_folder=output_folder,
141 | strip_sequences=True,
142 | remove_private=True) # force = True
143 | # save = True,
144 | # Get shared information
145 | aggregate = ["BodyPartExamined", "Modality", "StudyDescription"]
146 | shared_ids = get_shared_identifiers(dicom_files=updated_files,
147 | aggregate=aggregate)
148 | batch_ids.shared = shared_ids
149 | batch_ids.save()
150 |
151 | # Rename
152 | for dcm in batch.image_set.all():
153 | item_id = os.path.basename(dcm.image.path)
154 | try:
155 | dicom = dcm.load_dicom()
156 | # S6M0__
157 | # Rename the dicom based on suid
158 | if item_id in updated:
159 | item_suid = updated[item_id]['item_id']
160 | dcm = dcm.rename(item_suid) # added to [prefix][dcm.name]
161 | dcm.save()
162 | # If we don't have the id, don't risk uploading
163 | else:
164 | message = "%s for Image Id %s file read error: skipping." %(item_id, dcm.id)
165 | batch = add_batch_error(message,batch)
166 | dcm.delete()
167 | except:
168 | message = "%s for Image Id %s not found in lookup: skipping." %(item_id, dcm.id)
169 | batch = add_batch_error(message,batch)
170 | dcm.delete()
171 |
172 | batch.qa['ProcessFinishTime'] = time.time()
173 |
174 | # We don't get here if the call above failed
175 | change_status(batch,"DONEPROCESSING")
176 | batch.save()
177 |
178 | if run_upload_storage is True:
179 | return upload_storage(batch_ids=[bid])
180 | else:
181 | updated_files = batch.get_image_paths()
182 | return updated_files
183 |
--------------------------------------------------------------------------------
/sendit/apps/main/tasks/utils.py:
--------------------------------------------------------------------------------
1 | '''
2 |
3 | Copyright (c) 2017 Vanessa Sochat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | '''
24 |
25 | from django.core.files import File
26 | from sendit.logger import bot
27 | from sendit.apps.main.models import (
28 | Batch,
29 | BatchIdentifiers,
30 | Image
31 | )
32 |
33 | from sendit.settings import (
34 | GOOGLE_STORAGE_COLLECTION,
35 | ENTITY_ID,
36 | ITEM_ID
37 | )
38 |
39 | from django.conf import settings
40 | import uuid
41 | import tarfile
42 | import os
43 |
44 |
45 | def chunks(l, n):
46 | '''Yield successive n-sized chunks from l.'''
47 | for i in range(0, len(l), n):
48 | yield l[i:i + n]
49 |
50 |
51 | ### FILES ##############################################################
52 |
53 | def save_image_dicom(dicom,dicom_file,basename=None):
54 | '''save image dicom will save a dicom file to django's media
55 | storage, for this application defined under /images.
56 | :param dicom: the main.Image instance
57 | :param dicom_file: the dicom file (usually in /data) to save
58 | '''
59 | if basename is None:
60 | basename = os.path.basename(dicom_file)
61 | fullpath = "%s/%s" %(settings.MEDIA_ROOT,
62 | basename)
63 |
64 | folder = os.path.dirname(fullpath)
65 | if not os.path.exists(folder):
66 | os.mkdir(folder)
67 |
68 | with open(dicom_file,'rb') as filey:
69 | django_file = File(filey)
70 | dicom.image.save(basename,
71 | django_file,
72 | save=True)
73 | dicom.save()
74 | return dicom
75 |
76 |
77 | def generate_compressed_file(files, filename=None, mode="w:gz", archive_basename=None):
78 | ''' generate a tar.gz file (default) including a set of files '''
79 | if filename is None:
80 | filename = "%s.tar.gz" %str(uuid.uuid4())
81 | bot.debug("Compressing %s files into %s" %(len(files),filename))
82 | tar = tarfile.open(filename, mode)
83 | if archive_basename is None:
84 | archive_basename = os.path.basename(filename).split('.')[0]
85 | images_added = 0
86 | for name in files:
87 | try:
88 | # Make the archive flat with the images
89 | basename = "%s/%s" %(archive_basename,
90 | os.path.basename(name))
91 | tar.add(name, arcname=basename)
92 | images_added +=1
93 | except FileNotFoundError:
94 | pass
95 |
96 | tar.close()
97 | if images_added == 0:
98 | filename = None
99 | return filename
100 |
101 |
102 | ## MODELS ##############################################################
103 |
104 | def add_batch_message(message,batch,func,quiet=False):
105 | '''add batch error or warning to log,
106 | and flag the batch to have error.
107 | '''
108 | if quiet is False:
109 | func(message)
110 | batch.has_error = True
111 | if "errors" not in batch.logs:
112 | batch.logs['errors'] = []
113 | # Only add the unique error once
114 | if message not in batch.logs['errors']:
115 | batch.logs['errors'].append(message)
116 | batch.save()
117 | return batch
118 |
119 | def add_batch_warning(message,batch,quiet=False):
120 | return add_batch_message(message=message,
121 | batch=batch,
122 | func=bot.warning,
123 | quiet=quiet)
124 |
125 | def add_batch_error(message,batch,quiet=False):
126 | return add_batch_message(message=message,
127 | batch=batch,
128 | func=bot.error,
129 | quiet=quiet)
130 |
131 |
132 | def change_status(images,status):
133 | '''change status will update an instance status
134 | to the status choice provided. This works for batch
135 | and images
136 | '''
137 | updated = []
138 | if not isinstance(images,list):
139 | images = [images]
140 | for image in images:
141 | image.status=status
142 | image.save()
143 | updated.append(image)
144 | if len(updated) == 1:
145 | updated = updated[0]
146 | return updated
147 |
148 |
149 | # METADATA ##############################################################
150 |
151 | def prepare_entity_metadata(cleaned_ids,image_count=None):
152 | '''prepare metadata for entities for Google Storage
153 | '''
154 | metadata = dict()
155 | for secret_id, item in cleaned_ids.items():
156 | eid = item[ENTITY_ID]
157 | if eid not in metadata:
158 | metadata[eid] = dict()
159 | if "PatientAge" in item:
160 | metadata[eid]["PatientAge"] = item['PatientAge']
161 | if "PatientSex" in item:
162 | metadata[eid]["PatientSex"] = item['PatientSex']
163 | for eid, items in metadata.items():
164 | if image_count is not None:
165 | metadata[eid]["IMAGE_COUNT"] = image_count
166 | metadata[eid]["UPLOAD_AGENT"] = "STARR:SENDITClient"
167 | metadata[eid]["id"] = eid
168 | return metadata
169 |
170 |
171 | def prepare_items_metadata(batch):
172 | '''prepare metadata for items for Google Storage
173 | '''
174 | metadata = dict()
175 | cleaned = batch.batchidentifiers_set.last().cleaned
176 | for image in batch.image_set.all():
177 | secret_id = image.uid
178 | if secret_id in cleaned:
179 | metadata[image.image.path] = cleaned[secret_id]
180 | return metadata
181 |
182 |
183 | def extract_study_ids(cleaned,uid):
184 | '''cleaned should be a dictionary with (original item filenames) as
185 | lookup, and the uid as the variable defined as `ITEM_ID` in the dict
186 | of values under each item in cleaned. We use the uid of the entity as
187 | a lookup to link an item (and it's study) to the entity.'''
188 | studies = []
189 | for key,vals in cleaned.items():
190 | if vals[ENTITY_ID]==uid and vals[ITEM_ID] not in studies:
191 | studies.append(vals[ITEM_ID])
192 | return studies
193 |
194 |
195 | def get_entity_images(images,study_ids):
196 | '''Retrieve a list of entity images based
197 | on finding the entity id in the study path'''
198 | entity_images = []
199 | for study_id in study_ids:
200 | subset = [x for x in images if study_id in x]
201 | entity_images = entity_images + subset
202 | return entity_images
203 |
--------------------------------------------------------------------------------
/sendit/apps/main/utils.py:
--------------------------------------------------------------------------------
1 | '''
2 |
3 | Copyright (c) 2017 Vanessa Sochat
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | '''
24 |
25 | from django.core.files import File
26 | from django.http.response import Http404
27 | from sendit.settings import (
28 | DATA_BASE,
29 | DATA_SUBFOLDER,
30 | DATA_INPUT_FOLDERS
31 | )
32 | from sendit.apps.main.models import (
33 | Image,
34 | Batch
35 | )
36 |
37 | import time
38 | from sendit.logger import bot
39 | import sys
40 | import re
41 | import os
42 |
43 |
44 | #### GETS #############################################################
45 |
46 | def get_batch(sid):
47 | '''get a single report, or return 404'''
48 | keyargs = {'id':sid}
49 | try:
50 | batch = Batch.objects.get(**keyargs)
51 | except Batch.DoesNotExist:
52 | raise Http404
53 | else:
54 | return batch
55 |
56 |
57 | def get_image(sid):
58 | '''get a single report, or return 404'''
59 | keyargs = {'id':sid}
60 | try:
61 | image = Image.objects.get(**keyargs)
62 | except Image.DoesNotExist:
63 | raise Http404
64 | else:
65 | return image
66 |
67 |
68 | def get_database():
69 | ''' get the base directory for parsing images,
70 | if DATA_SUBFOLDER in settings is None, returns /data
71 | if set, returns /data/
72 | '''
73 | from sendit.settings import DATA_SUBFOLDER
74 | base = DATA_BASE
75 | if DATA_SUBFOLDER is not None:
76 | base = "%s/%s" %(base, DATA_SUBFOLDER.strip('/'))
77 | return base
78 |
79 |
80 | def ls_fullpath(dirname,ext=None):
81 | '''get full path of all files in a directory'''
82 | if ext is not None:
83 | return [os.path.join(dirname, f) for f in os.listdir(dirname) if f.endswith(ext)]
84 | return [os.path.join(dirname, f) for f in os.listdir(dirname)]
85 |
86 |
87 |
88 |
89 |
90 | #### WORKER ##########################################################
91 |
92 | def update_cached(subfolder=None):
93 | '''
94 | update the queue (batch object with status QUEUE), intended to be
95 | run when there are new folders to find and queue.
96 | First preference goes to a folder supplied to the function, then
97 | to application defaults. We return None if the result is None.
98 | '''
99 | CHECK_FOLDERS = None
100 |
101 | # First preference goes to variable given at runtime
102 | if subfolder is not None:
103 | CHECK_FOLDERS = subfolder
104 |
105 | # Second preference goes to DATA_INPUT_FOLDERS
106 | if DATA_INPUT_FOLDERS not in ['',None]:
107 | CHECK_FOLDERS = DATA_INPUT_FOLDERS
108 |
109 | # Final preference goes to data subfolder. We don't parse root.
110 | # The base of data has directories that need to be organized
111 | if CHECK_FOLDERS is None:
112 | if DATA_SUBFOLDER is not None:
113 | CHECK_FOLDERS = "%s/%s" %(DATA_BASE,DATA_SUBFOLDER)
114 | else:
115 | bot.error("Specify DATA_INPUT_FOLDERS in settings for cached jobs.")
116 | return
117 |
118 | if not isinstance(CHECK_FOLDERS,list):
119 | CHECK_FOLDERS = [CHECK_FOLDERS]
120 |
121 | count = 0
122 | current = [x.uid for x in Batch.objects.all()]
123 | for base in CHECK_FOLDERS:
124 | print('Checking base %s' %base)
125 | if os.path.exists(base) and os.path.isdir(base):
126 | # If it's not a date
127 | if not re.search('[0-9]{10}$', base):
128 | contenders = [base]
129 | else:
130 | contenders = get_contenders(base=base,current=current)
131 | for contender in contenders:
132 | dicom_dir = "%s/%s" %(base,contender)
133 | dcm_folder = os.path.basename(dicom_dir)
134 | batch,created = Batch.objects.get_or_create(uid=dcm_folder)
135 | if created is True:
136 | batch.status = "QUEUE"
137 | batch.logs['DICOM_DIR'] = dicom_dir
138 | count+=1
139 | batch.save()
140 |
141 | print("Added %s contenders for processing queue." %count)
142 |
143 |
144 | def start_queue(subfolder=None, max_count=None):
145 | '''
146 | start queue will be used to move new Batches (jobs) from the QUEUE to be
147 | run with celery tasks. The status is changed from QUEUE to NEW when this is done.
148 | If the QUEUE is empty, we parse the filesystem (and queue new jobs) again.
149 | This job submission is done all at once to ensure that we don't have race
150 | conditions of multiple workers trying to grab a job at the same time.
151 | '''
152 | from sendit.apps.main.tasks import import_dicomdir
153 |
154 | contenders = Batch.objects.filter(status="QUEUE")
155 | if len(contenders) == 0:
156 | update_cached(subfolder)
157 | contenders = Batch.objects.filter(status="QUEUE")
158 |
159 | started = 0
160 | for batch in contenders:
161 | # not seen folders in queue
162 | dicom_dir = batch.logs.get('DICOM_DIR')
163 | if dicom_dir is not None:
164 | import_dicomdir.apply_async(kwargs={"dicom_dir":dicom_dir})
165 | started +=1
166 | if max_count is not None:
167 | if started >= max_count:
168 | break
169 |
170 | print("Added %s tasks to the active queue." %started)
171 |
172 |
173 | def upload_finished(batches=False, chunk_size=1000):
174 | '''upload finished will upload datasets with status DONEPROCESSING
175 | to google storage. We do this with one worker to reduce the number
176 | of concurrent API calls. In the future, this will be better optimized.
177 | '''
178 | from sendit.apps.main.tasks import upload_storage
179 | from sendit.apps.main.tasks.utils import chunks
180 |
181 | if batches is False:
182 | upload_storage.apply_async()
183 | else:
184 | batch_ids = [b.id for b in Batch.objects.filter(status="DONEPROCESSING")]
185 | for subset in chunks(batch_ids, chunk_size):
186 | upload_storage.apply_async(kwargs={"batch_ids": subset})
187 |
188 |
189 | def get_contenders(base,current=None, filters=None):
190 | ''' get contenders will return a full set of contender folders from
191 | a base directory, taking account a list of currently known (current)
192 | and filtering to not include folder names ending with the list
193 | specified by filters
194 | '''
195 | if filters is None:
196 | filters = ['tmp','part']
197 | contenders = [x for x in os.listdir(base) if not os.path.isfile(x)]
198 | for ending in filters:
199 | contenders = [x for x in contenders if not x.endswith(ending)]
200 |
201 | if current is not None:
202 | contenders = [x for x in contenders if x not in current]
203 | return contenders
204 |
--------------------------------------------------------------------------------