├── Dockerfile ├── License.md ├── README.md ├── build_and_push.sh ├── lambda_handler.py ├── local_test ├── input.json ├── ping.sh ├── predict.sh ├── serve_local.sh └── test_dir │ ├── input │ ├── config │ │ └── hyperparameters.json │ └── data │ │ └── training │ │ └── iris.csv │ ├── model │ ├── classifier.pkl │ ├── sentiment_analysis_artifacts.tar.gz │ └── tfidf_vectorizer.pkl │ └── output │ └── success └── pothole_base ├── nginx.conf ├── pothole ├── __init__.py ├── mrcnn │ ├── __init__.py │ ├── config.py │ ├── model.py │ ├── parallel_model.py │ ├── utils.py │ └── visualize.py └── pothole.py ├── predictor.py ├── serve ├── settings.ini ├── train ├── video_markup.py └── wsgi.py /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training and inference in SageMaker 2 | # This is a Python 2 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM ubuntu:16.04 6 | 7 | #FOR GPU: 8 | #option 1/ nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04 9 | #option 2/ (smile case)# FROM nvidia/cuda:9.0-base-ubuntu16.04 10 | #option 3/ for docker gpu on local cpu: github.com/NVIDIA/nvidia-docker 11 | 12 | #**always run apt-get update with next command to fix caching issues 13 | RUN apt-get update && apt-get install -y unzip 14 | #libgcc option 15 | RUN apt-get install -y --no-install-recommends \ 16 | wget \ 17 | python3.5 \ 18 | nginx \ 19 | ca-certificates \ 20 | libgcc-5-dev \ 21 | && rm -rf /var/lib/apt/lists/* 22 | 23 | 24 | #For cv2: 25 | RUN apt-get update && apt-get install -y libxrender1 libsm6 libglib2.0 libxext6 26 | #For matlabplot: 27 | RUN apt-get install -y python3-tk 28 | RUN apt-get install -y vim 29 | 30 | 31 | #Python 32 | RUN wget https://bootstrap.pypa.io/3.3/get-pip.py && python3.5 get-pip.py 33 | RUN pip3 install --upgrade pip 34 | 35 | 36 | #https://medium.com/smileinnovation/sagemaker-bring-your-own-algorithms-719dd539607d 37 | #cuda-command-line-tools-9<96>0 38 | #cuda-cublas-dev-9<96>0 39 | #cuda-cudart-dev-9<96>0 40 | #cuda-cufft-dev-9<96>0 41 | #cuda-curand-dev-9<96>0 42 | #cuda-cusolver-dev-9<96>0 43 | #cuda-cusparse-dev-9<96>0 44 | #libcudnn7=7.0.5.15<96>1+cuda9.0 45 | #libcudnn7-dev=7.0.5.15<96>1+cuda9.0 46 | 47 | 48 | #?Both tensorflows? 49 | #So, what I did is to detect if the <93>import<94> of tensorflow raises an exception. 50 | #If it does, I uninstall <93>tensorflow-gpu<94> and install 51 | #<93>tensorflow<94> at the Docker container startup. 52 | #It results in a few more seconds to start up the instance, but this 53 | #<93>hack<94> is working perfectly fine in this scenario. I<92>m pretty sure 54 | #there are other ways to do it, feel free to comment if you have one in your mind. 55 | 56 | #std pothole requirements.txt 57 | RUN pip3 install tensorflow>=1.5.1 58 | #GPU - requires blas libs etc# RUN pip3 install tensorflow-gpu>=1.5.1 59 | RUN pip3 install keras>=2.0.8 60 | RUN pip3 install numpy 61 | RUN pip3 install scipy 62 | RUN pip3 install Pillow 63 | RUN pip3 install cython 64 | RUN pip3 install matplotlib 65 | RUN pip3 install scikit-image 66 | RUN pip3 install opencv-python 67 | RUN pip3 install h5py 68 | RUN pip3 install imgaug 69 | RUN pip3 install IPython[all] 70 | RUN pip3 install moviepy 71 | RUN pip3 install pytesseract 72 | 73 | RUN pip3 install flask gevent gunicorn 74 | RUN pip3 install boto3 75 | RUN pip3 install configparser 76 | RUN pip3 install imageio 77 | 78 | 79 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard 80 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE 81 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update 82 | # PATH so that the train and serve programs are found when the container is invoked. 83 | 84 | ENV PYTHONUNBUFFERED=TRUE 85 | ENV PYTHONDONTWRITEBYTECODE=TRUE 86 | ENV PATH="/opt/program:${PATH}" 87 | 88 | #OTHER ENV options: 89 | #zero# ENV MODELS_PATH=/opt/ml/model 90 | 91 | # Set up the program in the image 92 | COPY pothole_base /opt/program 93 | 94 | RUN chmod +x /opt/program/train /opt/program/serve 95 | 96 | RUN wget https://tests-road-damage.s3.amazonaws.com/sagemaker/mask_rcnn_pothole_0005.h5 97 | RUN mv mask_rcnn_pothole_0005.h5 /opt/program/pothole 98 | 99 | # install ffmpeg from imageio. 100 | RUN python3.5 -c "import imageio; imageio.plugins.ffmpeg.download()" 101 | 102 | #add soft link so that ffmpeg can executed (like usual) from command line 103 | RUN ln -s /root/.imageio/ffmpeg/ffmpeg.linux64 /usr/bin/ffmpeg 104 | 105 | WORKDIR /opt/program 106 | 107 | #set default python version to 3.5 108 | RUN touch ~/.bash_aliases \ 109 | && echo alias python=\'python3.5\' > ~/.bash_aliases 110 | RUN alias python=python3.5 -------------------------------------------------------------------------------- /License.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 yaswanth.chakka@yahoo.com. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction only for educational purposes, including without limitation the rights 6 | to use, copy, modify, merge and to permit persons to whom the Software is 7 | furnished to do so, subject to the following conditions: 8 | 9 | Any kind of commericial usage of this Software is prohibited please contact the author of this repo for pemission at the above mentioned email address. 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Future Developements 2 | ![alt Full scale Product](https://s3.amazonaws.com/wildrydes-yash-chakka/Architecture_New.png) 3 | 4 | # Architecture 5 | ![alt Minimum Viable Product](https://s3.amazonaws.com/wildrydes-yash-chakka/Architecture_Object_Detection.png) 6 | 7 | # General Flow 8 | ![general flow](https://s3.amazonaws.com/wildrydes-yash-chakka/General_Flow.png) 9 | 10 | # Video Output 11 | https://github.com/Cyash/Pothole_Detection/assets/23339145/9f7cad38-3a7a-49a9-a078-970f8081f6ab 12 | 13 | # Pothole-deployments 14 | Deployment environment for pothole detection system 15 | 16 | # Sagemaker Deployment Notes 17 | 18 | Deploying a custom machine learning package to sagemaker. 19 | - There are quite a few moving parts. Lets go through the standard workflow. 20 | 21 | # Docker Image 22 | - The key component is to get your setup to compile correctly with docker. 23 | - Install Docker and test hello world example using `docker run hello-world` you should see "Hello from Docker!This message shows that your installation appears to be working correctly." 24 | - Run the following standard commands to validate the setup locally: 25 | - `Docker build -t trial1 .` It will build the environment and download the large models 26 | - `Docker run -p 80:8080 trial1 serve .` This will start the docker container locally. The [serve] program/command is run which starts the wsgi and predictor.py Flask handler. 27 | • Note: if you get an error “python\r” you may have a bad hard return in the serve file. Run “dos2unix serve” to repair it. 28 | - `cd local_test` and run `./predict.sh`. This will past a standard test request to the docker container. The input s3 filename will be downloaded and processed by the container. 29 | 30 | With the above validated, you’re ready to deploy to sagemaker. 31 | 32 | # Push docker to ECR 33 | - Setup your aws cli environment: “aws configure” using your key and secret. 34 | - Ensure you have access to the ECR service (where you push docker). 35 | - Login to ecr: “aws ecr get-login”..then run the response to create a login session 36 | - Run “./build_and_push.sh pothole-docker-image” 37 | o This does the same docker build as the local test above and then pushes the image to ECR. 38 | o “pothole-docker-image” can be any name of the container. 39 | - Log into ECR and note the id of the docker repository ie/ 588698724959.dkr.ecr.us-east-1.amazonaws.com/pothole_docker_image 40 | 41 | # CREATE MODEL 42 | - Log into sagemaker and “create endpoint” 43 | o Create a new model 44 | o You’ll need to configure the “endpoint configuration” AND the “endpoint” 45 | o Once the endpoint is created… it will say “Creating”. After about 10 minutes the model should be active. You can see progress by going to the Cloudwatch logs. 46 |  https://console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints 47 | 48 | # CREATE LAMBDA 49 | - Create a lambda function (ie/ CallPothole) 50 | - It should run when a new s3 object is place in the bucket. 51 | - You may need to give it access to access full control of sagemaker (IAMS policy). This allows it to call the sagemaker_endpoint. 52 | - Copy and paste the lambda_handler.py script into the lambda editor. 53 | - Test event. The lambda_handler.py has an s3 test event in the source code. You can copy this into a “test” event for lambda. It simulates an s3 file being placed. 54 | - Note: The lambda will timeout after 3 seconds, but sagemaker will continue to run and will complete normally. 55 | o If wanted, you can add to lambda to send an alert/email once an *_output.mp4 file appears in the bucket. Or, have a more sophisticated monitoring tool. 56 | 57 | That’s it! 58 | - You can validate it’s running by placing a video file in the s3 bucket directory. The lambda should see it, call sagemaker, which then processes the file and outputs it as: input_filename_output.mp4 59 | 60 | When ready to fully activate the system, change the lambda variable “is_live” from False to True. False only processes 0.01s of video. True processes the entire video but can potentially take ½ day. 61 | 62 | 63 | # Potential issues or exceptions 64 | - Note: “server” file controls how many server workers there are. This can typically be the number of CPUs. For now, it’s hard coded at 2. Meaning, it can process two concurrent videos at a time. Though the exact performance requirements would need to be evaluated. 65 | - The multithreaded system causes some trouble with the RCNN model. For this reason, it was necessary to secure the model session before use (see backend.get_session() in video_markup.py) 66 | - Note, the RCNN model.py file had a concurrency issue as well when creating a directory that may already exist. This was the only changed needed to the ML library. 67 | - Parts(Cognito, SES) of the Above listed architecture hasn't been implemented as there weren't super critical, but for full security we definatelt need to implement them 68 | 69 | -------------------------------------------------------------------------------- /build_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #dos2unix build_and_push.sh if \r 3 | 4 | # This script shows how to build the Docker image and push it to ECR to be ready for use 5 | # by SageMaker. 6 | 7 | # The argument to this script is the image name. This will be used as the image on the local 8 | # machine and combined with the account and region to form the repository name for ECR. 9 | image=$1 10 | 11 | if [ "$image" == "" ] 12 | then 13 | echo "Usage: $0 " 14 | exit 1 15 | fi 16 | 17 | chmod +x pothole_base/train 18 | chmod +x pothole_base/serve 19 | 20 | # Get the account number associated with the current IAM credentials 21 | account=$(aws sts get-caller-identity --query Account --output text) 22 | 23 | if [ $? -ne 0 ] 24 | then 25 | exit 255 26 | fi 27 | 28 | 29 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 30 | region=$(aws configure get region) 31 | region=${region:-us-west-2} 32 | 33 | 34 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest" 35 | 36 | # If the repository doesn't exist in ECR, create it. 37 | 38 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1 39 | 40 | if [ $? -ne 0 ] 41 | then 42 | aws ecr create-repository --repository-name "${image}" > /dev/null 43 | fi 44 | 45 | # Get the login command from ECR and execute it directly 46 | $(aws ecr get-login --region ${region} --no-include-email) 47 | 48 | # Build the docker image locally with the image name and then push it to ECR 49 | # with the full name. 50 | 51 | docker build -t ${image} . 52 | docker tag ${image} ${fullname} 53 | 54 | docker push ${fullname} 55 | -------------------------------------------------------------------------------- /lambda_handler.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import io 4 | import boto3 5 | import json 6 | import csv 7 | import urllib 8 | import re 9 | 10 | 11 | #0v1# JC Sept 14, 2018 12 | 13 | 14 | # grab environment variables 15 | ENDPOINT_NAME = os.environ['ENDPOINT_NAME'] 16 | runtime= boto3.client('runtime.sagemaker') 17 | 18 | test_vector_for_lambda=""" 19 | { 20 | "Records": [ 21 | { 22 | "eventVersion": "2.0", 23 | "eventTime": "1970-01-01T00:00:00.000Z", 24 | "requestParameters": { 25 | "sourceIPAddress": "127.0.0.1" 26 | }, 27 | "s3": { 28 | "configurationId": "testConfigRule", 29 | "object": { 30 | "eTag": "0123456789abcdef0123456789abcdef", 31 | "sequencer": "0A1B2C3D4E5F678901", 32 | "key": "sagemaker/small_pot.mp4", 33 | "size": 1024 34 | }, 35 | "bucket": { 36 | "arn": "arn:aws:s3:::mybucket", 37 | "name": "tests-road-damange", 38 | "ownerIdentity": { 39 | "principalId": "EXAMPLE" 40 | } 41 | }, 42 | "s3SchemaVersion": "1.0" 43 | }, 44 | "responseElements": { 45 | "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH", 46 | "x-amz-request-id": "EXAMPLE123456789" 47 | }, 48 | "awsRegion": "us-east-1", 49 | "eventName": "ObjectCreated:Put", 50 | "userIdentity": { 51 | "principalId": "EXAMPLE" 52 | }, 53 | "eventSource": "aws:s3" 54 | } 55 | ] 56 | } 57 | """ 58 | 59 | #https://aws.amazon.com/blogs/machine-learning/call-an-amazon-sagemaker-model-endpoint-using-amazon-api-gateway-and-aws-lambda/ 60 | 61 | #ENDPOINT_NAME is an environment variable that holds the name of the SageMaker model endpoint you just deployed using the sample 62 | #endpoint name you created, if it is different. 63 | #The event that invokes the Lambda function is triggered by API Gateway. API Gateway simply passes the test data through an event. 64 | 65 | 66 | def handle_lambda(event,context,called_local=False): 67 | #**Use above event if running manual test on amazon 68 | 69 | #1/ GET S3 file creation event 70 | #############################################################333 71 | print("EVENT: "+str(event)) 72 | bucket_name = event['Records'][0]['s3']['bucket']['name'] 73 | key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']) 74 | print("Using bucket name: "+bucket_name) 75 | print("Using key: "+key) 76 | 77 | if not re.search(r'_output\.',key): 78 | #2/ Call sagemaker endpoint invocation 79 | #> note: will likley timeout by default 80 | payload={} 81 | payload['input']={} 82 | payload['input']['s3_source_filename']=key 83 | payload['input']['s3_bucket']=bucket_name 84 | payload['input']['is_live']=False 85 | 86 | print("Calling endpoint...") 87 | response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME, ContentType='application/json', Body=json.dumps(payload)) 88 | print("RESPONSE:") 89 | print(response) 90 | #result = json.loads(response['Body'].read().decode()) 91 | else: 92 | print("New file appears like output: _output -- skipping: "+str(key)) 93 | return "Standard response" 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /local_test/input.json: -------------------------------------------------------------------------------- 1 | {"input": 2 | {"s3_source_filename" : "/sagemaker/small_pot.mp4", 3 | "s3_bucket" : "tests-road-damange", 4 | "is_live": false} 5 | } -------------------------------------------------------------------------------- /local_test/ping.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | payload=$1 4 | content=${2:-application/json} 5 | 6 | curl -vX GET http://localhost:8081/ping 7 | -------------------------------------------------------------------------------- /local_test/predict.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | payload=$1 4 | content=${2:-application/json} 5 | 6 | #curl -d @${payload} -H "Content-Type: ${content}" -vX POST http://localhost:8080/invocations 7 | curl -XPOST -H "Content-Type: application/json" 'http://127.0.0.1:8081/invocations' -d '{"input": {"s3_source_filename" : "sagemaker/small_pot.mp4", "s3_bucket" : "tests-road-damage", "is_live": false} }' 8 | 9 | -------------------------------------------------------------------------------- /local_test/serve_local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | image=$1 4 | 5 | docker run -v $(pwd)/test_dir:/opt/ml -p 8080:8080 --rm ${image} serve 6 | -------------------------------------------------------------------------------- /local_test/test_dir/input/config/hyperparameters.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /local_test/test_dir/input/data/training/iris.csv: -------------------------------------------------------------------------------- 1 | setosa,5.1,3.5,1.4,0.2 2 | setosa,4.9,3,1.4,0.2 3 | setosa,4.7,3.2,1.3,0.2 4 | setosa,4.6,3.1,1.5,0.2 5 | setosa,5,3.6,1.4,0.2 6 | setosa,5.4,3.9,1.7,0.4 7 | setosa,4.6,3.4,1.4,0.3 8 | setosa,5,3.4,1.5,0.2 9 | setosa,4.4,2.9,1.4,0.2 10 | setosa,4.9,3.1,1.5,0.1 11 | setosa,5.4,3.7,1.5,0.2 12 | setosa,4.8,3.4,1.6,0.2 13 | setosa,4.8,3,1.4,0.1 14 | setosa,4.3,3,1.1,0.1 15 | setosa,5.8,4,1.2,0.2 16 | setosa,5.7,4.4,1.5,0.4 17 | setosa,5.4,3.9,1.3,0.4 18 | setosa,5.1,3.5,1.4,0.3 19 | setosa,5.7,3.8,1.7,0.3 20 | setosa,5.1,3.8,1.5,0.3 21 | setosa,5.4,3.4,1.7,0.2 22 | setosa,5.1,3.7,1.5,0.4 23 | setosa,4.6,3.6,1,0.2 24 | setosa,5.1,3.3,1.7,0.5 25 | setosa,4.8,3.4,1.9,0.2 26 | setosa,5,3,1.6,0.2 27 | setosa,5,3.4,1.6,0.4 28 | setosa,5.2,3.5,1.5,0.2 29 | setosa,5.2,3.4,1.4,0.2 30 | setosa,4.7,3.2,1.6,0.2 31 | setosa,4.8,3.1,1.6,0.2 32 | setosa,5.4,3.4,1.5,0.4 33 | setosa,5.2,4.1,1.5,0.1 34 | setosa,5.5,4.2,1.4,0.2 35 | setosa,4.9,3.1,1.5,0.2 36 | setosa,5,3.2,1.2,0.2 37 | setosa,5.5,3.5,1.3,0.2 38 | setosa,4.9,3.6,1.4,0.1 39 | setosa,4.4,3,1.3,0.2 40 | setosa,5.1,3.4,1.5,0.2 41 | setosa,5,3.5,1.3,0.3 42 | setosa,4.5,2.3,1.3,0.3 43 | setosa,4.4,3.2,1.3,0.2 44 | setosa,5,3.5,1.6,0.6 45 | setosa,5.1,3.8,1.9,0.4 46 | setosa,4.8,3,1.4,0.3 47 | setosa,5.1,3.8,1.6,0.2 48 | setosa,4.6,3.2,1.4,0.2 49 | setosa,5.3,3.7,1.5,0.2 50 | setosa,5,3.3,1.4,0.2 51 | versicolor,7,3.2,4.7,1.4 52 | versicolor,6.4,3.2,4.5,1.5 53 | versicolor,6.9,3.1,4.9,1.5 54 | versicolor,5.5,2.3,4,1.3 55 | versicolor,6.5,2.8,4.6,1.5 56 | versicolor,5.7,2.8,4.5,1.3 57 | versicolor,6.3,3.3,4.7,1.6 58 | versicolor,4.9,2.4,3.3,1 59 | versicolor,6.6,2.9,4.6,1.3 60 | versicolor,5.2,2.7,3.9,1.4 61 | versicolor,5,2,3.5,1 62 | versicolor,5.9,3,4.2,1.5 63 | versicolor,6,2.2,4,1 64 | versicolor,6.1,2.9,4.7,1.4 65 | versicolor,5.6,2.9,3.6,1.3 66 | versicolor,6.7,3.1,4.4,1.4 67 | versicolor,5.6,3,4.5,1.5 68 | versicolor,5.8,2.7,4.1,1 69 | versicolor,6.2,2.2,4.5,1.5 70 | versicolor,5.6,2.5,3.9,1.1 71 | versicolor,5.9,3.2,4.8,1.8 72 | versicolor,6.1,2.8,4,1.3 73 | versicolor,6.3,2.5,4.9,1.5 74 | versicolor,6.1,2.8,4.7,1.2 75 | versicolor,6.4,2.9,4.3,1.3 76 | versicolor,6.6,3,4.4,1.4 77 | versicolor,6.8,2.8,4.8,1.4 78 | versicolor,6.7,3,5,1.7 79 | versicolor,6,2.9,4.5,1.5 80 | versicolor,5.7,2.6,3.5,1 81 | versicolor,5.5,2.4,3.8,1.1 82 | versicolor,5.5,2.4,3.7,1 83 | versicolor,5.8,2.7,3.9,1.2 84 | versicolor,6,2.7,5.1,1.6 85 | versicolor,5.4,3,4.5,1.5 86 | versicolor,6,3.4,4.5,1.6 87 | versicolor,6.7,3.1,4.7,1.5 88 | versicolor,6.3,2.3,4.4,1.3 89 | versicolor,5.6,3,4.1,1.3 90 | versicolor,5.5,2.5,4,1.3 91 | versicolor,5.5,2.6,4.4,1.2 92 | versicolor,6.1,3,4.6,1.4 93 | versicolor,5.8,2.6,4,1.2 94 | versicolor,5,2.3,3.3,1 95 | versicolor,5.6,2.7,4.2,1.3 96 | versicolor,5.7,3,4.2,1.2 97 | versicolor,5.7,2.9,4.2,1.3 98 | versicolor,6.2,2.9,4.3,1.3 99 | versicolor,5.1,2.5,3,1.1 100 | versicolor,5.7,2.8,4.1,1.3 101 | virginica,6.3,3.3,6,2.5 102 | virginica,5.8,2.7,5.1,1.9 103 | virginica,7.1,3,5.9,2.1 104 | virginica,6.3,2.9,5.6,1.8 105 | virginica,6.5,3,5.8,2.2 106 | virginica,7.6,3,6.6,2.1 107 | virginica,4.9,2.5,4.5,1.7 108 | virginica,7.3,2.9,6.3,1.8 109 | virginica,6.7,2.5,5.8,1.8 110 | virginica,7.2,3.6,6.1,2.5 111 | virginica,6.5,3.2,5.1,2 112 | virginica,6.4,2.7,5.3,1.9 113 | virginica,6.8,3,5.5,2.1 114 | virginica,5.7,2.5,5,2 115 | virginica,5.8,2.8,5.1,2.4 116 | virginica,6.4,3.2,5.3,2.3 117 | virginica,6.5,3,5.5,1.8 118 | virginica,7.7,3.8,6.7,2.2 119 | virginica,7.7,2.6,6.9,2.3 120 | virginica,6,2.2,5,1.5 121 | virginica,6.9,3.2,5.7,2.3 122 | virginica,5.6,2.8,4.9,2 123 | virginica,7.7,2.8,6.7,2 124 | virginica,6.3,2.7,4.9,1.8 125 | virginica,6.7,3.3,5.7,2.1 126 | virginica,7.2,3.2,6,1.8 127 | virginica,6.2,2.8,4.8,1.8 128 | virginica,6.1,3,4.9,1.8 129 | virginica,6.4,2.8,5.6,2.1 130 | virginica,7.2,3,5.8,1.6 131 | virginica,7.4,2.8,6.1,1.9 132 | virginica,7.9,3.8,6.4,2 133 | virginica,6.4,2.8,5.6,2.2 134 | virginica,6.3,2.8,5.1,1.5 135 | virginica,6.1,2.6,5.6,1.4 136 | virginica,7.7,3,6.1,2.3 137 | virginica,6.3,3.4,5.6,2.4 138 | virginica,6.4,3.1,5.5,1.8 139 | virginica,6,3,4.8,1.8 140 | virginica,6.9,3.1,5.4,2.1 141 | virginica,6.7,3.1,5.6,2.4 142 | virginica,6.9,3.1,5.1,2.3 143 | virginica,5.8,2.7,5.1,1.9 144 | virginica,6.8,3.2,5.9,2.3 145 | virginica,6.7,3.3,5.7,2.5 146 | virginica,6.7,3,5.2,2.3 147 | virginica,6.3,2.5,5,1.9 148 | virginica,6.5,3,5.2,2 149 | virginica,6.2,3.4,5.4,2.3 150 | virginica,5.9,3,5.1,1.8 151 | -------------------------------------------------------------------------------- /local_test/test_dir/model/classifier.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cyash/Pothole_Detection/5bdaa68729653f0f4d5cd330edf5cf42d10d3aed/local_test/test_dir/model/classifier.pkl -------------------------------------------------------------------------------- /local_test/test_dir/model/sentiment_analysis_artifacts.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cyash/Pothole_Detection/5bdaa68729653f0f4d5cd330edf5cf42d10d3aed/local_test/test_dir/model/sentiment_analysis_artifacts.tar.gz -------------------------------------------------------------------------------- /local_test/test_dir/model/tfidf_vectorizer.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cyash/Pothole_Detection/5bdaa68729653f0f4d5cd330edf5cf42d10d3aed/local_test/test_dir/model/tfidf_vectorizer.pkl -------------------------------------------------------------------------------- /local_test/test_dir/output/success: -------------------------------------------------------------------------------- 1 | Done -------------------------------------------------------------------------------- /pothole_base/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | 27 | location ~ ^/(ping|invocations) { 28 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 29 | proxy_set_header Host $http_host; 30 | proxy_redirect off; 31 | proxy_pass http://gunicorn; 32 | } 33 | 34 | location / { 35 | return 404 "{}"; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /pothole_base/pothole/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pothole_base/pothole/mrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pothole_base/pothole/mrcnn/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import numpy as np 11 | 12 | 13 | # Base Configuration Class 14 | # Don't use this class directly. Instead, sub-class it and override 15 | # the configurations you need to change. 16 | 17 | class Config(object): 18 | """Base configuration class. For custom configurations, create a 19 | sub-class that inherits from this one and override properties 20 | that need to be changed. 21 | """ 22 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 23 | # Useful if your code needs to do things differently depending on which 24 | # experiment is running. 25 | NAME = None # Override in sub-classes 26 | 27 | # NUMBER OF GPUs to use. For CPU training, use 1 28 | GPU_COUNT = 1 29 | 30 | # Number of images to train with on each GPU. A 12GB GPU can typically 31 | # handle 2 images of 1024x1024px. 32 | # Adjust based on your GPU memory and image sizes. Use the highest 33 | # number that your GPU can handle for best performance. 34 | IMAGES_PER_GPU = 2 35 | 36 | # Number of training steps per epoch 37 | # This doesn't need to match the size of the training set. Tensorboard 38 | # updates are saved at the end of each epoch, so setting this to a 39 | # smaller number means getting more frequent TensorBoard updates. 40 | # Validation stats are also calculated at each epoch end and they 41 | # might take a while, so don't set this too small to avoid spending 42 | # a lot of time on validation stats. 43 | STEPS_PER_EPOCH = 1000 44 | 45 | # Number of validation steps to run at the end of every training epoch. 46 | # A bigger number improves accuracy of validation stats, but slows 47 | # down the training. 48 | VALIDATION_STEPS = 50 49 | 50 | # Backbone network architecture 51 | # Supported values are: resnet50, resnet101. 52 | # You can also provide a callable that should have the signature 53 | # of model.resnet_graph. If you do so, you need to supply a callable 54 | # to COMPUTE_BACKBONE_SHAPE as well 55 | BACKBONE = "resnet101" 56 | 57 | # Only useful if you supply a callable to BACKBONE. Should compute 58 | # the shape of each layer of the FPN Pyramid. 59 | # See model.compute_backbone_shapes 60 | COMPUTE_BACKBONE_SHAPE = None 61 | 62 | # The strides of each layer of the FPN Pyramid. These values 63 | # are based on a Resnet101 backbone. 64 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 65 | 66 | # Size of the fully-connected layers in the classification graph 67 | FPN_CLASSIF_FC_LAYERS_SIZE = 1024 68 | 69 | # Size of the top-down layers used to build the feature pyramid 70 | TOP_DOWN_PYRAMID_SIZE = 256 71 | 72 | # Number of classification classes (including background) 73 | NUM_CLASSES = 1 # Override in sub-classes 74 | 75 | # Length of square anchor side in pixels 76 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 77 | 78 | # Ratios of anchors at each cell (width/height) 79 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 80 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 81 | 82 | # Anchor stride 83 | # If 1 then anchors are created for each cell in the backbone feature map. 84 | # If 2, then anchors are created for every other cell, and so on. 85 | RPN_ANCHOR_STRIDE = 1 86 | 87 | # Non-max suppression threshold to filter RPN proposals. 88 | # You can increase this during training to generate more propsals. 89 | RPN_NMS_THRESHOLD = 0.7 90 | 91 | # How many anchors per image to use for RPN training 92 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 93 | 94 | # ROIs kept after non-maximum suppression (training and inference) 95 | POST_NMS_ROIS_TRAINING = 2000 96 | POST_NMS_ROIS_INFERENCE = 1000 97 | 98 | # If enabled, resizes instance masks to a smaller size to reduce 99 | # memory load. Recommended when using high-resolution images. 100 | USE_MINI_MASK = True 101 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 102 | 103 | # Input image resizing 104 | # Generally, use the "square" resizing mode for training and predicting 105 | # and it should work well in most cases. In this mode, images are scaled 106 | # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the 107 | # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is 108 | # padded with zeros to make it a square so multiple images can be put 109 | # in one batch. 110 | # Available resizing modes: 111 | # none: No resizing or padding. Return the image unchanged. 112 | # square: Resize and pad with zeros to get a square image 113 | # of size [max_dim, max_dim]. 114 | # pad64: Pads width and height with zeros to make them multiples of 64. 115 | # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales 116 | # up before padding. IMAGE_MAX_DIM is ignored in this mode. 117 | # The multiple of 64 is needed to ensure smooth scaling of feature 118 | # maps up and down the 6 levels of the FPN pyramid (2**6=64). 119 | # crop: Picks random crops from the image. First, scales the image based 120 | # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of 121 | # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. 122 | # IMAGE_MAX_DIM is not used in this mode. 123 | IMAGE_RESIZE_MODE = "square" 124 | IMAGE_MIN_DIM = 800 125 | IMAGE_MAX_DIM = 1024 126 | # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further 127 | # up scaling. For example, if set to 2 then images are scaled up to double 128 | # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. 129 | # Howver, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. 130 | IMAGE_MIN_SCALE = 0 131 | 132 | # Image mean (RGB) 133 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 134 | 135 | # Number of ROIs per image to feed to classifier/mask heads 136 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 137 | # enough positive proposals to fill this and keep a positive:negative 138 | # ratio of 1:3. You can increase the number of proposals by adjusting 139 | # the RPN NMS threshold. 140 | TRAIN_ROIS_PER_IMAGE = 200 141 | 142 | # Percent of positive ROIs used to train classifier/mask heads 143 | ROI_POSITIVE_RATIO = 0.33 144 | 145 | # Pooled ROIs 146 | POOL_SIZE = 7 147 | MASK_POOL_SIZE = 14 148 | 149 | # Shape of output mask 150 | # To change this you also need to change the neural network mask branch 151 | MASK_SHAPE = [28, 28] 152 | 153 | # Maximum number of ground truth instances to use in one image 154 | MAX_GT_INSTANCES = 100 155 | 156 | # Bounding box refinement standard deviation for RPN and final detections. 157 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 158 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 159 | 160 | # Max number of final detections 161 | DETECTION_MAX_INSTANCES = 100 162 | 163 | # Minimum probability value to accept a detected instance 164 | # ROIs below this threshold are skipped 165 | DETECTION_MIN_CONFIDENCE = 0.7 166 | 167 | # Non-maximum suppression threshold for detection 168 | DETECTION_NMS_THRESHOLD = 0.3 169 | 170 | # Learning rate and momentum 171 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 172 | # weights to explode. Likely due to differences in optimizer 173 | # implementation. 174 | LEARNING_RATE = 0.001 175 | LEARNING_MOMENTUM = 0.9 176 | 177 | # Weight decay regularization 178 | WEIGHT_DECAY = 0.0001 179 | 180 | # Loss weights for more precise optimization. 181 | # Can be used for R-CNN training setup. 182 | LOSS_WEIGHTS = { 183 | "rpn_class_loss": 1., 184 | "rpn_bbox_loss": 1., 185 | "mrcnn_class_loss": 1., 186 | "mrcnn_bbox_loss": 1., 187 | "mrcnn_mask_loss": 1. 188 | } 189 | 190 | # Use RPN ROIs or externally generated ROIs for training 191 | # Keep this True for most situations. Set to False if you want to train 192 | # the head branches on ROI generated by code rather than the ROIs from 193 | # the RPN. For example, to debug the classifier head without having to 194 | # train the RPN. 195 | USE_RPN_ROIS = True 196 | 197 | # Train or freeze batch normalization layers 198 | # None: Train BN layers. This is the normal mode 199 | # False: Freeze BN layers. Good when using a small batch size 200 | # True: (don't use). Set layer in training mode even when predicting 201 | TRAIN_BN = False # Defaulting to False since batch size is often small 202 | 203 | # Gradient norm clipping 204 | GRADIENT_CLIP_NORM = 5.0 205 | 206 | def __init__(self): 207 | """Set values of computed attributes.""" 208 | # Effective batch size 209 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 210 | 211 | # Input image size 212 | if self.IMAGE_RESIZE_MODE == "crop": 213 | self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, 3]) 214 | else: 215 | self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 216 | 217 | # Image meta data length 218 | # See compose_image_meta() for details 219 | self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES 220 | 221 | def display(self): 222 | """Display Configuration values.""" 223 | print("\nConfigurations:") 224 | for a in dir(self): 225 | if not a.startswith("__") and not callable(getattr(self, a)): 226 | print("{:30} {}".format(a, getattr(self, a))) 227 | print("\n") 228 | -------------------------------------------------------------------------------- /pothole_base/pothole/mrcnn/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # Concatenate or average outputs? 93 | # Outputs usually have a batch dimension and we concatenate 94 | # across it. If they don't, then the output is likely a loss 95 | # or a metric value that gets averaged across the batch. 96 | # Keras expects losses and metrics to be scalars. 97 | if K.int_shape(outputs[0]) == (): 98 | # Average 99 | m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs) 100 | else: 101 | # Concatenate 102 | m = KL.Concatenate(axis=0, name=name)(outputs) 103 | merged.append(m) 104 | return merged 105 | 106 | 107 | if __name__ == "__main__": 108 | # Testing code below. It creates a simple model to train on MNIST and 109 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 110 | # in TensorBoard. Run it as: 111 | # 112 | # python3 parallel_model.py 113 | 114 | import os 115 | import numpy as np 116 | import keras.optimizers 117 | from keras.datasets import mnist 118 | from keras.preprocessing.image import ImageDataGenerator 119 | 120 | GPU_COUNT = 2 121 | 122 | # Root directory of the project 123 | ROOT_DIR = os.path.abspath("../") 124 | 125 | # Directory to save logs and trained model 126 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 127 | 128 | def build_model(x_train, num_classes): 129 | # Reset default graph. Keras leaves old ops in the graph, 130 | # which are ignored for execution but clutter graph 131 | # visualization in TensorBoard. 132 | tf.reset_default_graph() 133 | 134 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 135 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 136 | name="conv1")(inputs) 137 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 138 | name="conv2")(x) 139 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 140 | x = KL.Flatten(name="flat1")(x) 141 | x = KL.Dense(128, activation='relu', name="dense1")(x) 142 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 143 | 144 | return KM.Model(inputs, x, "digit_classifier_model") 145 | 146 | # Load MNIST Data 147 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 148 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 149 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 150 | 151 | print('x_train shape:', x_train.shape) 152 | print('x_test shape:', x_test.shape) 153 | 154 | # Build data generator and model 155 | datagen = ImageDataGenerator() 156 | model = build_model(x_train, 10) 157 | 158 | # Add multi-GPU support. 159 | model = ParallelModel(model, GPU_COUNT) 160 | 161 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 162 | 163 | model.compile(loss='sparse_categorical_crossentropy', 164 | optimizer=optimizer, metrics=['accuracy']) 165 | 166 | model.summary() 167 | 168 | # Train 169 | model.fit_generator( 170 | datagen.flow(x_train, y_train, batch_size=64), 171 | steps_per_epoch=50, epochs=10, verbose=1, 172 | validation_data=(x_test, y_test), 173 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 174 | write_graph=True)] 175 | ) 176 | -------------------------------------------------------------------------------- /pothole_base/pothole/mrcnn/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import math 13 | import random 14 | import numpy as np 15 | import tensorflow as tf 16 | import scipy 17 | import skimage.color 18 | import skimage.io 19 | import skimage.transform 20 | import urllib.request 21 | import shutil 22 | import warnings 23 | 24 | # URL from which to download the latest COCO trained weights 25 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 26 | 27 | 28 | ############################################################ 29 | # Bounding Boxes 30 | ############################################################ 31 | 32 | def extract_bboxes(mask): 33 | """Compute bounding boxes from masks. 34 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 35 | 36 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 37 | """ 38 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 39 | for i in range(mask.shape[-1]): 40 | m = mask[:, :, i] 41 | # Bounding box. 42 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 43 | vertical_indicies = np.where(np.any(m, axis=1))[0] 44 | if horizontal_indicies.shape[0]: 45 | x1, x2 = horizontal_indicies[[0, -1]] 46 | y1, y2 = vertical_indicies[[0, -1]] 47 | # x2 and y2 should not be part of the box. Increment by 1. 48 | x2 += 1 49 | y2 += 1 50 | else: 51 | # No mask for this instance. Might happen due to 52 | # resizing or cropping. Set bbox to zeros 53 | x1, x2, y1, y2 = 0, 0, 0, 0 54 | boxes[i] = np.array([y1, x1, y2, x2]) 55 | return boxes.astype(np.int32) 56 | 57 | 58 | def compute_iou(box, boxes, box_area, boxes_area): 59 | """Calculates IoU of the given box with the array of the given boxes. 60 | box: 1D vector [y1, x1, y2, x2] 61 | boxes: [boxes_count, (y1, x1, y2, x2)] 62 | box_area: float. the area of 'box' 63 | boxes_area: array of length boxes_count. 64 | 65 | Note: the areas are passed in rather than calculated here for 66 | efficiency. Calculate once in the caller to avoid duplicate work. 67 | """ 68 | # Calculate intersection areas 69 | y1 = np.maximum(box[0], boxes[:, 0]) 70 | y2 = np.minimum(box[2], boxes[:, 2]) 71 | x1 = np.maximum(box[1], boxes[:, 1]) 72 | x2 = np.minimum(box[3], boxes[:, 3]) 73 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 74 | union = box_area + boxes_area[:] - intersection[:] 75 | iou = intersection / union 76 | return iou 77 | 78 | 79 | def compute_overlaps(boxes1, boxes2): 80 | """Computes IoU overlaps between two sets of boxes. 81 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 82 | 83 | For better performance, pass the largest set first and the smaller second. 84 | """ 85 | # Areas of anchors and GT boxes 86 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 87 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 88 | 89 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 90 | # Each cell contains the IoU value. 91 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 92 | for i in range(overlaps.shape[1]): 93 | box2 = boxes2[i] 94 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 95 | return overlaps 96 | 97 | 98 | def compute_overlaps_masks(masks1, masks2): 99 | """Computes IoU overlaps between two sets of masks. 100 | masks1, masks2: [Height, Width, instances] 101 | """ 102 | 103 | # If either set of masks is empty return empty result 104 | if masks1.shape[0] == 0 or masks2.shape[0] == 0: 105 | return np.zeros((masks1.shape[0], masks2.shape[-1])) 106 | # flatten masks and compute their areas 107 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 108 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 109 | area1 = np.sum(masks1, axis=0) 110 | area2 = np.sum(masks2, axis=0) 111 | 112 | # intersections and union 113 | intersections = np.dot(masks1.T, masks2) 114 | union = area1[:, None] + area2[None, :] - intersections 115 | overlaps = intersections / union 116 | 117 | return overlaps 118 | 119 | 120 | def non_max_suppression(boxes, scores, threshold): 121 | """Performs non-maximum suppression and returns indices of kept boxes. 122 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 123 | scores: 1-D array of box scores. 124 | threshold: Float. IoU threshold to use for filtering. 125 | """ 126 | assert boxes.shape[0] > 0 127 | if boxes.dtype.kind != "f": 128 | boxes = boxes.astype(np.float32) 129 | 130 | # Compute box areas 131 | y1 = boxes[:, 0] 132 | x1 = boxes[:, 1] 133 | y2 = boxes[:, 2] 134 | x2 = boxes[:, 3] 135 | area = (y2 - y1) * (x2 - x1) 136 | 137 | # Get indicies of boxes sorted by scores (highest first) 138 | ixs = scores.argsort()[::-1] 139 | 140 | pick = [] 141 | while len(ixs) > 0: 142 | # Pick top box and add its index to the list 143 | i = ixs[0] 144 | pick.append(i) 145 | # Compute IoU of the picked box with the rest 146 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 147 | # Identify boxes with IoU over the threshold. This 148 | # returns indices into ixs[1:], so add 1 to get 149 | # indices into ixs. 150 | remove_ixs = np.where(iou > threshold)[0] + 1 151 | # Remove indices of the picked and overlapped boxes. 152 | ixs = np.delete(ixs, remove_ixs) 153 | ixs = np.delete(ixs, 0) 154 | return np.array(pick, dtype=np.int32) 155 | 156 | 157 | def apply_box_deltas(boxes, deltas): 158 | """Applies the given deltas to the given boxes. 159 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 160 | deltas: [N, (dy, dx, log(dh), log(dw))] 161 | """ 162 | boxes = boxes.astype(np.float32) 163 | # Convert to y, x, h, w 164 | height = boxes[:, 2] - boxes[:, 0] 165 | width = boxes[:, 3] - boxes[:, 1] 166 | center_y = boxes[:, 0] + 0.5 * height 167 | center_x = boxes[:, 1] + 0.5 * width 168 | # Apply deltas 169 | center_y += deltas[:, 0] * height 170 | center_x += deltas[:, 1] * width 171 | height *= np.exp(deltas[:, 2]) 172 | width *= np.exp(deltas[:, 3]) 173 | # Convert back to y1, x1, y2, x2 174 | y1 = center_y - 0.5 * height 175 | x1 = center_x - 0.5 * width 176 | y2 = y1 + height 177 | x2 = x1 + width 178 | return np.stack([y1, x1, y2, x2], axis=1) 179 | 180 | 181 | def box_refinement_graph(box, gt_box): 182 | """Compute refinement needed to transform box to gt_box. 183 | box and gt_box are [N, (y1, x1, y2, x2)] 184 | """ 185 | box = tf.cast(box, tf.float32) 186 | gt_box = tf.cast(gt_box, tf.float32) 187 | 188 | height = box[:, 2] - box[:, 0] 189 | width = box[:, 3] - box[:, 1] 190 | center_y = box[:, 0] + 0.5 * height 191 | center_x = box[:, 1] + 0.5 * width 192 | 193 | gt_height = gt_box[:, 2] - gt_box[:, 0] 194 | gt_width = gt_box[:, 3] - gt_box[:, 1] 195 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 196 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 197 | 198 | dy = (gt_center_y - center_y) / height 199 | dx = (gt_center_x - center_x) / width 200 | dh = tf.log(gt_height / height) 201 | dw = tf.log(gt_width / width) 202 | 203 | result = tf.stack([dy, dx, dh, dw], axis=1) 204 | return result 205 | 206 | 207 | def box_refinement(box, gt_box): 208 | """Compute refinement needed to transform box to gt_box. 209 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 210 | assumed to be outside the box. 211 | """ 212 | box = box.astype(np.float32) 213 | gt_box = gt_box.astype(np.float32) 214 | 215 | height = box[:, 2] - box[:, 0] 216 | width = box[:, 3] - box[:, 1] 217 | center_y = box[:, 0] + 0.5 * height 218 | center_x = box[:, 1] + 0.5 * width 219 | 220 | gt_height = gt_box[:, 2] - gt_box[:, 0] 221 | gt_width = gt_box[:, 3] - gt_box[:, 1] 222 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 223 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 224 | 225 | dy = (gt_center_y - center_y) / height 226 | dx = (gt_center_x - center_x) / width 227 | dh = np.log(gt_height / height) 228 | dw = np.log(gt_width / width) 229 | 230 | return np.stack([dy, dx, dh, dw], axis=1) 231 | 232 | 233 | ############################################################ 234 | # Dataset 235 | ############################################################ 236 | 237 | class Dataset(object): 238 | """The base class for dataset classes. 239 | To use it, create a new class that adds functions specific to the dataset 240 | you want to use. For example: 241 | 242 | class CatsAndDogsDataset(Dataset): 243 | def load_cats_and_dogs(self): 244 | ... 245 | def load_mask(self, image_id): 246 | ... 247 | def image_reference(self, image_id): 248 | ... 249 | 250 | See COCODataset and ShapesDataset as examples. 251 | """ 252 | 253 | def __init__(self, class_map=None): 254 | self._image_ids = [] 255 | self.image_info = [] 256 | # Background is always the first class 257 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 258 | self.source_class_ids = {} 259 | 260 | def add_class(self, source, class_id, class_name): 261 | assert "." not in source, "Source name cannot contain a dot" 262 | # Does the class exist already? 263 | for info in self.class_info: 264 | if info['source'] == source and info["id"] == class_id: 265 | # source.class_id combination already available, skip 266 | return 267 | # Add the class 268 | self.class_info.append({ 269 | "source": source, 270 | "id": class_id, 271 | "name": class_name, 272 | }) 273 | 274 | def add_image(self, source, image_id, path, **kwargs): 275 | image_info = { 276 | "id": image_id, 277 | "source": source, 278 | "path": path, 279 | } 280 | image_info.update(kwargs) 281 | self.image_info.append(image_info) 282 | 283 | def image_reference(self, image_id): 284 | """Return a link to the image in its source Website or details about 285 | the image that help looking it up or debugging it. 286 | 287 | Override for your dataset, but pass to this function 288 | if you encounter images not in your dataset. 289 | """ 290 | return "" 291 | 292 | def prepare(self, class_map=None): 293 | """Prepares the Dataset class for use. 294 | 295 | TODO: class map is not supported yet. When done, it should handle mapping 296 | classes from different datasets to the same class ID. 297 | """ 298 | 299 | def clean_name(name): 300 | """Returns a shorter version of object names for cleaner display.""" 301 | return ",".join(name.split(",")[:1]) 302 | 303 | # Build (or rebuild) everything else from the info dicts. 304 | self.num_classes = len(self.class_info) 305 | self.class_ids = np.arange(self.num_classes) 306 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 307 | self.num_images = len(self.image_info) 308 | self._image_ids = np.arange(self.num_images) 309 | 310 | # Mapping from source class and image IDs to internal IDs 311 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 312 | for info, id in zip(self.class_info, self.class_ids)} 313 | self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id 314 | for info, id in zip(self.image_info, self.image_ids)} 315 | 316 | # Map sources to class_ids they support 317 | self.sources = list(set([i['source'] for i in self.class_info])) 318 | self.source_class_ids = {} 319 | # Loop over datasets 320 | for source in self.sources: 321 | self.source_class_ids[source] = [] 322 | # Find classes that belong to this dataset 323 | for i, info in enumerate(self.class_info): 324 | # Include BG class in all datasets 325 | if i == 0 or source == info['source']: 326 | self.source_class_ids[source].append(i) 327 | 328 | def map_source_class_id(self, source_class_id): 329 | """Takes a source class ID and returns the int class ID assigned to it. 330 | 331 | For example: 332 | dataset.map_source_class_id("coco.12") -> 23 333 | """ 334 | return self.class_from_source_map[source_class_id] 335 | 336 | def get_source_class_id(self, class_id, source): 337 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 338 | info = self.class_info[class_id] 339 | assert info['source'] == source 340 | return info['id'] 341 | 342 | def append_data(self, class_info, image_info): 343 | self.external_to_class_id = {} 344 | for i, c in enumerate(self.class_info): 345 | for ds, id in c["map"]: 346 | self.external_to_class_id[ds + str(id)] = i 347 | 348 | # Map external image IDs to internal ones. 349 | self.external_to_image_id = {} 350 | for i, info in enumerate(self.image_info): 351 | self.external_to_image_id[info["ds"] + str(info["id"])] = i 352 | 353 | @property 354 | def image_ids(self): 355 | return self._image_ids 356 | 357 | def source_image_link(self, image_id): 358 | """Returns the path or URL to the image. 359 | Override this to return a URL to the image if it's available online for easy 360 | debugging. 361 | """ 362 | return self.image_info[image_id]["path"] 363 | 364 | def load_image(self, image_id): 365 | """Load the specified image and return a [H,W,3] Numpy array. 366 | """ 367 | # Load image 368 | image = skimage.io.imread(self.image_info[image_id]['path']) 369 | # If grayscale. Convert to RGB for consistency. 370 | if image.ndim != 3: 371 | image = skimage.color.gray2rgb(image) 372 | # If has an alpha channel, remove it for consistency 373 | if image.shape[-1] == 4: 374 | image = image[..., :3] 375 | return image 376 | 377 | def load_mask(self, image_id): 378 | """Load instance masks for the given image. 379 | 380 | Different datasets use different ways to store masks. Override this 381 | method to load instance masks and return them in the form of am 382 | array of binary masks of shape [height, width, instances]. 383 | 384 | Returns: 385 | masks: A bool array of shape [height, width, instance count] with 386 | a binary mask per instance. 387 | class_ids: a 1D array of class IDs of the instance masks. 388 | """ 389 | # Override this function to load a mask from your dataset. 390 | # Otherwise, it returns an empty mask. 391 | mask = np.empty([0, 0, 0]) 392 | class_ids = np.empty([0], np.int32) 393 | return mask, class_ids 394 | 395 | 396 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): 397 | """Resizes an image keeping the aspect ratio unchanged. 398 | 399 | min_dim: if provided, resizes the image such that it's smaller 400 | dimension == min_dim 401 | max_dim: if provided, ensures that the image longest side doesn't 402 | exceed this value. 403 | min_scale: if provided, ensure that the image is scaled up by at least 404 | this percent even if min_dim doesn't require it. 405 | mode: Resizing mode. 406 | none: No resizing. Return the image unchanged. 407 | square: Resize and pad with zeros to get a square image 408 | of size [max_dim, max_dim]. 409 | pad64: Pads width and height with zeros to make them multiples of 64. 410 | If min_dim or min_scale are provided, it scales the image up 411 | before padding. max_dim is ignored in this mode. 412 | The multiple of 64 is needed to ensure smooth scaling of feature 413 | maps up and down the 6 levels of the FPN pyramid (2**6=64). 414 | crop: Picks random crops from the image. First, scales the image based 415 | on min_dim and min_scale, then picks a random crop of 416 | size min_dim x min_dim. Can be used in training only. 417 | max_dim is not used in this mode. 418 | 419 | Returns: 420 | image: the resized image 421 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 422 | be inserted in the returned image. If so, this window is the 423 | coordinates of the image part of the full image (excluding 424 | the padding). The x2, y2 pixels are not included. 425 | scale: The scale factor used to resize the image 426 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 427 | """ 428 | # Keep track of image dtype and return results in the same dtype 429 | image_dtype = image.dtype 430 | # Default window (y1, x1, y2, x2) and default scale == 1. 431 | h, w = image.shape[:2] 432 | window = (0, 0, h, w) 433 | scale = 1 434 | padding = [(0, 0), (0, 0), (0, 0)] 435 | crop = None 436 | 437 | if mode == "none": 438 | return image, window, scale, padding, crop 439 | 440 | # Scale? 441 | if min_dim: 442 | # Scale up but not down 443 | scale = max(1, min_dim / min(h, w)) 444 | if min_scale and scale < min_scale: 445 | scale = min_scale 446 | 447 | # Does it exceed max dim? 448 | if max_dim and mode == "square": 449 | image_max = max(h, w) 450 | if round(image_max * scale) > max_dim: 451 | scale = max_dim / image_max 452 | 453 | # Resize image using bilinear interpolation 454 | if scale != 1: 455 | image = skimage.transform.resize( 456 | image, (round(h * scale), round(w * scale)), 457 | order=1, mode="constant", preserve_range=True) 458 | 459 | # Need padding or cropping? 460 | if mode == "square": 461 | # Get new height and width 462 | h, w = image.shape[:2] 463 | top_pad = (max_dim - h) // 2 464 | bottom_pad = max_dim - h - top_pad 465 | left_pad = (max_dim - w) // 2 466 | right_pad = max_dim - w - left_pad 467 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 468 | image = np.pad(image, padding, mode='constant', constant_values=0) 469 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 470 | elif mode == "pad64": 471 | h, w = image.shape[:2] 472 | # Both sides must be divisible by 64 473 | assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" 474 | # Height 475 | if h % 64 > 0: 476 | max_h = h - (h % 64) + 64 477 | top_pad = (max_h - h) // 2 478 | bottom_pad = max_h - h - top_pad 479 | else: 480 | top_pad = bottom_pad = 0 481 | # Width 482 | if w % 64 > 0: 483 | max_w = w - (w % 64) + 64 484 | left_pad = (max_w - w) // 2 485 | right_pad = max_w - w - left_pad 486 | else: 487 | left_pad = right_pad = 0 488 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 489 | image = np.pad(image, padding, mode='constant', constant_values=0) 490 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 491 | elif mode == "crop": 492 | # Pick a random crop 493 | h, w = image.shape[:2] 494 | y = random.randint(0, (h - min_dim)) 495 | x = random.randint(0, (w - min_dim)) 496 | crop = (y, x, min_dim, min_dim) 497 | image = image[y:y + min_dim, x:x + min_dim] 498 | window = (0, 0, min_dim, min_dim) 499 | else: 500 | raise Exception("Mode {} not supported".format(mode)) 501 | return image.astype(image_dtype), window, scale, padding, crop 502 | 503 | 504 | def resize_mask(mask, scale, padding, crop=None): 505 | """Resizes a mask using the given scale and padding. 506 | Typically, you get the scale and padding from resize_image() to 507 | ensure both, the image and the mask, are resized consistently. 508 | 509 | scale: mask scaling factor 510 | padding: Padding to add to the mask in the form 511 | [(top, bottom), (left, right), (0, 0)] 512 | """ 513 | # Suppress warning from scipy 0.13.0, the output shape of zoom() is 514 | # calculated with round() instead of int() 515 | with warnings.catch_warnings(): 516 | warnings.simplefilter("ignore") 517 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 518 | if crop is not None: 519 | y, x, h, w = crop 520 | mask = mask[y:y + h, x:x + w] 521 | else: 522 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 523 | return mask 524 | 525 | 526 | def minimize_mask(bbox, mask, mini_shape): 527 | """Resize masks to a smaller version to reduce memory load. 528 | Mini-masks can be resized back to image scale using expand_masks() 529 | 530 | See inspect_data.ipynb notebook for more details. 531 | """ 532 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 533 | for i in range(mask.shape[-1]): 534 | # Pick slice and cast to bool in case load_mask() returned wrong dtype 535 | m = mask[:, :, i].astype(bool) 536 | y1, x1, y2, x2 = bbox[i][:4] 537 | m = m[y1:y2, x1:x2] 538 | if m.size == 0: 539 | raise Exception("Invalid bounding box with area of zero") 540 | # Resize with bilinear interpolation 541 | m = skimage.transform.resize(m, mini_shape, order=1, mode="constant") 542 | mini_mask[:, :, i] = np.around(m).astype(np.bool) 543 | return mini_mask 544 | 545 | 546 | def expand_mask(bbox, mini_mask, image_shape): 547 | """Resizes mini masks back to image size. Reverses the change 548 | of minimize_mask(). 549 | 550 | See inspect_data.ipynb notebook for more details. 551 | """ 552 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 553 | for i in range(mask.shape[-1]): 554 | m = mini_mask[:, :, i] 555 | y1, x1, y2, x2 = bbox[i][:4] 556 | h = y2 - y1 557 | w = x2 - x1 558 | # Resize with bilinear interpolation 559 | m = skimage.transform.resize(m, (h, w), order=1, mode="constant") 560 | mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) 561 | return mask 562 | 563 | 564 | # TODO: Build and use this function to reduce code duplication 565 | def mold_mask(mask, config): 566 | pass 567 | 568 | 569 | def unmold_mask(mask, bbox, image_shape): 570 | """Converts a mask generated by the neural network to a format similar 571 | to its original shape. 572 | mask: [height, width] of type float. A small, typically 28x28 mask. 573 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 574 | 575 | Returns a binary mask with the same size as the original image. 576 | """ 577 | threshold = 0.5 578 | y1, x1, y2, x2 = bbox 579 | mask = skimage.transform.resize(mask, (y2 - y1, x2 - x1), order=1, mode="constant") 580 | mask = np.where(mask >= threshold, 1, 0).astype(np.bool) 581 | 582 | # Put the mask in the right location. 583 | full_mask = np.zeros(image_shape[:2], dtype=np.bool) 584 | full_mask[y1:y2, x1:x2] = mask 585 | return full_mask 586 | 587 | 588 | ############################################################ 589 | # Anchors 590 | ############################################################ 591 | 592 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 593 | """ 594 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 595 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 596 | shape: [height, width] spatial shape of the feature map over which 597 | to generate anchors. 598 | feature_stride: Stride of the feature map relative to the image in pixels. 599 | anchor_stride: Stride of anchors on the feature map. For example, if the 600 | value is 2 then generate anchors for every other feature map pixel. 601 | """ 602 | # Get all combinations of scales and ratios 603 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 604 | scales = scales.flatten() 605 | ratios = ratios.flatten() 606 | 607 | # Enumerate heights and widths from scales and ratios 608 | heights = scales / np.sqrt(ratios) 609 | widths = scales * np.sqrt(ratios) 610 | 611 | # Enumerate shifts in feature space 612 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 613 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 614 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 615 | 616 | # Enumerate combinations of shifts, widths, and heights 617 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 618 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 619 | 620 | # Reshape to get a list of (y, x) and a list of (h, w) 621 | box_centers = np.stack( 622 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 623 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 624 | 625 | # Convert to corner coordinates (y1, x1, y2, x2) 626 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 627 | box_centers + 0.5 * box_sizes], axis=1) 628 | return boxes 629 | 630 | 631 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 632 | anchor_stride): 633 | """Generate anchors at different levels of a feature pyramid. Each scale 634 | is associated with a level of the pyramid, but each ratio is used in 635 | all levels of the pyramid. 636 | 637 | Returns: 638 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 639 | with the same order of the given scales. So, anchors of scale[0] come 640 | first, then anchors of scale[1], and so on. 641 | """ 642 | # Anchors 643 | # [anchor_count, (y1, x1, y2, x2)] 644 | anchors = [] 645 | for i in range(len(scales)): 646 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 647 | feature_strides[i], anchor_stride)) 648 | return np.concatenate(anchors, axis=0) 649 | 650 | 651 | ############################################################ 652 | # Miscellaneous 653 | ############################################################ 654 | 655 | def trim_zeros(x): 656 | """It's common to have tensors larger than the available data and 657 | pad with zeros. This function removes rows that are all zeros. 658 | 659 | x: [rows, columns]. 660 | """ 661 | assert len(x.shape) == 2 662 | return x[~np.all(x == 0, axis=1)] 663 | 664 | 665 | def compute_matches(gt_boxes, gt_class_ids, gt_masks, 666 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 667 | iou_threshold=0.5, score_threshold=0.0): 668 | """Finds matches between prediction and ground truth instances. 669 | 670 | Returns: 671 | gt_match: 1-D array. For each GT box it has the index of the matched 672 | predicted box. 673 | pred_match: 1-D array. For each predicted box, it has the index of 674 | the matched ground truth box. 675 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 676 | """ 677 | # Trim zero padding 678 | # TODO: cleaner to do zero unpadding upstream 679 | gt_boxes = trim_zeros(gt_boxes) 680 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 681 | pred_boxes = trim_zeros(pred_boxes) 682 | pred_scores = pred_scores[:pred_boxes.shape[0]] 683 | # Sort predictions by score from high to low 684 | indices = np.argsort(pred_scores)[::-1] 685 | pred_boxes = pred_boxes[indices] 686 | pred_class_ids = pred_class_ids[indices] 687 | pred_scores = pred_scores[indices] 688 | pred_masks = pred_masks[..., indices] 689 | 690 | # Compute IoU overlaps [pred_masks, gt_masks] 691 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 692 | 693 | # Loop through predictions and find matching ground truth boxes 694 | match_count = 0 695 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 696 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 697 | for i in range(len(pred_boxes)): 698 | # Find best matching ground truth box 699 | # 1. Sort matches by score 700 | sorted_ixs = np.argsort(overlaps[i])[::-1] 701 | # 2. Remove low scores 702 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 703 | if low_score_idx.size > 0: 704 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 705 | # 3. Find the match 706 | for j in sorted_ixs: 707 | # If ground truth box is already matched, go to next one 708 | if gt_match[j] > 0: 709 | continue 710 | # If we reach IoU smaller than the threshold, end the loop 711 | iou = overlaps[i, j] 712 | if iou < iou_threshold: 713 | break 714 | # Do we have a match? 715 | if pred_class_ids[i] == gt_class_ids[j]: 716 | match_count += 1 717 | gt_match[j] = i 718 | pred_match[i] = j 719 | break 720 | 721 | return gt_match, pred_match, overlaps 722 | 723 | 724 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 725 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 726 | iou_threshold=0.5): 727 | """Compute Average Precision at a set IoU threshold (default 0.5). 728 | 729 | Returns: 730 | mAP: Mean Average Precision 731 | precisions: List of precisions at different class score thresholds. 732 | recalls: List of recall values at different class score thresholds. 733 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 734 | """ 735 | # Get matches and overlaps 736 | gt_match, pred_match, overlaps = compute_matches( 737 | gt_boxes, gt_class_ids, gt_masks, 738 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 739 | iou_threshold) 740 | 741 | # Compute precision and recall at each prediction box step 742 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 743 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 744 | 745 | # Pad with start and end values to simplify the math 746 | precisions = np.concatenate([[0], precisions, [0]]) 747 | recalls = np.concatenate([[0], recalls, [1]]) 748 | 749 | # Ensure precision values decrease but don't increase. This way, the 750 | # precision value at each recall threshold is the maximum it can be 751 | # for all following recall thresholds, as specified by the VOC paper. 752 | for i in range(len(precisions) - 2, -1, -1): 753 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 754 | 755 | # Compute mean AP over recall range 756 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 757 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 758 | precisions[indices]) 759 | 760 | return mAP, precisions, recalls, overlaps 761 | 762 | 763 | def compute_ap_range(gt_box, gt_class_id, gt_mask, 764 | pred_box, pred_class_id, pred_score, pred_mask, 765 | iou_thresholds=None, verbose=1): 766 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" 767 | # Default is 0.5 to 0.95 with increments of 0.05 768 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) 769 | 770 | # Compute AP over range of IoU thresholds 771 | AP = [] 772 | for iou_threshold in iou_thresholds: 773 | ap, precisions, recalls, overlaps =\ 774 | compute_ap(gt_box, gt_class_id, gt_mask, 775 | pred_box, pred_class_id, pred_score, pred_mask, 776 | iou_threshold=iou_threshold) 777 | if verbose: 778 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) 779 | AP.append(ap) 780 | AP = np.array(AP).mean() 781 | if verbose: 782 | print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( 783 | iou_thresholds[0], iou_thresholds[-1], AP)) 784 | return AP 785 | 786 | 787 | def compute_recall(pred_boxes, gt_boxes, iou): 788 | """Compute the recall at the given IoU threshold. It's an indication 789 | of how many GT boxes were found by the given prediction boxes. 790 | 791 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 792 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 793 | """ 794 | # Measure overlaps 795 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 796 | iou_max = np.max(overlaps, axis=1) 797 | iou_argmax = np.argmax(overlaps, axis=1) 798 | positive_ids = np.where(iou_max >= iou)[0] 799 | matched_gt_boxes = iou_argmax[positive_ids] 800 | 801 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 802 | return recall, positive_ids 803 | 804 | 805 | # ## Batch Slicing 806 | # Some custom layers support a batch size of 1 only, and require a lot of work 807 | # to support batches greater than 1. This function slices an input tensor 808 | # across the batch dimension and feeds batches of size 1. Effectively, 809 | # an easy way to support batches > 1 quickly with little code modification. 810 | # In the long run, it's more efficient to modify the code to support large 811 | # batches and getting rid of this function. Consider this a temporary solution 812 | def batch_slice(inputs, graph_fn, batch_size, names=None): 813 | """Splits inputs into slices and feeds each slice to a copy of the given 814 | computation graph and then combines the results. It allows you to run a 815 | graph on a batch of inputs even if the graph is written to support one 816 | instance only. 817 | 818 | inputs: list of tensors. All must have the same first dimension length 819 | graph_fn: A function that returns a TF tensor that's part of a graph. 820 | batch_size: number of slices to divide the data into. 821 | names: If provided, assigns names to the resulting tensors. 822 | """ 823 | if not isinstance(inputs, list): 824 | inputs = [inputs] 825 | 826 | outputs = [] 827 | for i in range(batch_size): 828 | inputs_slice = [x[i] for x in inputs] 829 | output_slice = graph_fn(*inputs_slice) 830 | if not isinstance(output_slice, (tuple, list)): 831 | output_slice = [output_slice] 832 | outputs.append(output_slice) 833 | # Change outputs from a list of slices where each is 834 | # a list of outputs to a list of outputs and each has 835 | # a list of slices 836 | outputs = list(zip(*outputs)) 837 | 838 | if names is None: 839 | names = [None] * len(outputs) 840 | 841 | result = [tf.stack(o, axis=0, name=n) 842 | for o, n in zip(outputs, names)] 843 | if len(result) == 1: 844 | result = result[0] 845 | 846 | return result 847 | 848 | 849 | def download_trained_weights(coco_model_path, verbose=1): 850 | """Download COCO trained weights from Releases. 851 | 852 | coco_model_path: local path of COCO trained weights 853 | """ 854 | if verbose > 0: 855 | print("Downloading pretrained model to " + coco_model_path + " ...") 856 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 857 | shutil.copyfileobj(resp, out) 858 | if verbose > 0: 859 | print("... done downloading pretrained model!") 860 | 861 | 862 | def norm_boxes(boxes, shape): 863 | """Converts boxes from pixel coordinates to normalized coordinates. 864 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates 865 | shape: [..., (height, width)] in pixels 866 | 867 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 868 | coordinates it's inside the box. 869 | 870 | Returns: 871 | [N, (y1, x1, y2, x2)] in normalized coordinates 872 | """ 873 | h, w = shape 874 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 875 | shift = np.array([0, 0, 1, 1]) 876 | return np.divide((boxes - shift), scale).astype(np.float32) 877 | 878 | 879 | def denorm_boxes(boxes, shape): 880 | """Converts boxes from normalized coordinates to pixel coordinates. 881 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates 882 | shape: [..., (height, width)] in pixels 883 | 884 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 885 | coordinates it's inside the box. 886 | 887 | Returns: 888 | [N, (y1, x1, y2, x2)] in pixel coordinates 889 | """ 890 | h, w = shape 891 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 892 | shift = np.array([0, 0, 1, 1]) 893 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) 894 | -------------------------------------------------------------------------------- /pothole_base/pothole/mrcnn/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Display and Visualization Functions. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import os 11 | import sys 12 | import random 13 | import itertools 14 | import colorsys 15 | 16 | import numpy as np 17 | from skimage.measure import find_contours 18 | import matplotlib.pyplot as plt 19 | from matplotlib import patches, lines 20 | from matplotlib.patches import Polygon 21 | import IPython.display 22 | 23 | # Root directory of the project 24 | ROOT_DIR = os.path.abspath("../") 25 | 26 | # Import Mask RCNN 27 | sys.path.append(ROOT_DIR) # To find local version of the library 28 | from mrcnn import utils 29 | 30 | 31 | ############################################################ 32 | # Visualization 33 | ############################################################ 34 | 35 | def display_images(images, titles=None, cols=4, cmap=None, norm=None, 36 | interpolation=None): 37 | """Display the given set of images, optionally with titles. 38 | images: list or array of image tensors in HWC format. 39 | titles: optional. A list of titles to display with each image. 40 | cols: number of images per row 41 | cmap: Optional. Color map to use. For example, "Blues". 42 | norm: Optional. A Normalize instance to map values to colors. 43 | interpolation: Optional. Image interpolation to use for display. 44 | """ 45 | titles = titles if titles is not None else [""] * len(images) 46 | rows = len(images) // cols + 1 47 | plt.figure(figsize=(14, 14 * rows // cols)) 48 | i = 1 49 | for image, title in zip(images, titles): 50 | plt.subplot(rows, cols, i) 51 | plt.title(title, fontsize=9) 52 | plt.axis('off') 53 | plt.imshow(image.astype(np.uint8), cmap=cmap, 54 | norm=norm, interpolation=interpolation) 55 | i += 1 56 | plt.show() 57 | 58 | 59 | def random_colors(N, bright=True): 60 | """ 61 | Generate random colors. 62 | To get visually distinct colors, generate them in HSV space then 63 | convert to RGB. 64 | """ 65 | brightness = 1.0 if bright else 0.7 66 | hsv = [(i / N, 1, brightness) for i in range(N)] 67 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 68 | random.shuffle(colors) 69 | return colors 70 | 71 | 72 | def apply_mask(image, mask, color, alpha=0.5): 73 | """Apply the given mask to the image. 74 | """ 75 | for c in range(3): 76 | image[:, :, c] = np.where(mask == 1, 77 | image[:, :, c] * 78 | (1 - alpha) + alpha * color[c] * 255, 79 | image[:, :, c]) 80 | return image 81 | 82 | 83 | def display_instances(image, boxes, masks, class_ids, class_names, 84 | scores=None, title="", 85 | figsize=(16, 16), ax=None, 86 | show_mask=True, show_bbox=True, 87 | colors=None, captions=None): 88 | """ 89 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 90 | masks: [height, width, num_instances] 91 | class_ids: [num_instances] 92 | class_names: list of class names of the dataset 93 | scores: (optional) confidence scores for each box 94 | title: (optional) Figure title 95 | show_mask, show_bbox: To show masks and bounding boxes or not 96 | figsize: (optional) the size of the image 97 | colors: (optional) An array or colors to use with each object 98 | captions: (optional) A list of strings to use as captions for each object 99 | """ 100 | # Number of instances 101 | N = boxes.shape[0] 102 | if not N: 103 | print("\n*** No instances to display *** \n") 104 | else: 105 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 106 | 107 | # If no axis is passed, create one and automatically call show() 108 | auto_show = False 109 | if not ax: 110 | _, ax = plt.subplots(1, figsize=figsize) 111 | auto_show = True 112 | 113 | # Generate random colors 114 | colors = colors or random_colors(N) 115 | 116 | # Show area outside image boundaries. 117 | height, width = image.shape[:2] 118 | ax.set_ylim(height + 10, -10) 119 | ax.set_xlim(-10, width + 10) 120 | ax.axis('off') 121 | ax.set_title(title) 122 | 123 | masked_image = image.astype(np.uint32).copy() 124 | for i in range(N): 125 | color = colors[i] 126 | 127 | # Bounding box 128 | if not np.any(boxes[i]): 129 | # Skip this instance. Has no bbox. Likely lost in image cropping. 130 | continue 131 | y1, x1, y2, x2 = boxes[i] 132 | if show_bbox: 133 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 134 | alpha=0.7, linestyle="dashed", 135 | edgecolor=color, facecolor='none') 136 | ax.add_patch(p) 137 | 138 | # Label 139 | if not captions: 140 | class_id = class_ids[i] 141 | score = scores[i] if scores is not None else None 142 | label = class_names[class_id] 143 | x = random.randint(x1, (x1 + x2) // 2) 144 | caption = "{} {:.3f}".format(label, score) if score else label 145 | else: 146 | caption = captions[i] 147 | ax.text(x1, y1 + 8, caption, 148 | color='w', size=11, backgroundcolor="none") 149 | 150 | # Mask 151 | mask = masks[:, :, i] 152 | if show_mask: 153 | masked_image = apply_mask(masked_image, mask, color) 154 | 155 | # Mask Polygon 156 | # Pad to ensure proper polygons for masks that touch image edges. 157 | padded_mask = np.zeros( 158 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 159 | padded_mask[1:-1, 1:-1] = mask 160 | contours = find_contours(padded_mask, 0.5) 161 | for verts in contours: 162 | # Subtract the padding and flip (y, x) to (x, y) 163 | verts = np.fliplr(verts) - 1 164 | p = Polygon(verts, facecolor="none", edgecolor=color) 165 | ax.add_patch(p) 166 | ax.imshow(masked_image.astype(np.uint8)) 167 | if auto_show: 168 | plt.show() 169 | 170 | 171 | def display_differences(image, 172 | gt_box, gt_class_id, gt_mask, 173 | pred_box, pred_class_id, pred_score, pred_mask, 174 | class_names, title="", ax=None, 175 | show_mask=True, show_box=True, 176 | iou_threshold=0.5, score_threshold=0.5): 177 | """Display ground truth and prediction instances on the same image.""" 178 | # Match predictions to ground truth 179 | gt_match, pred_match, overlaps = utils.compute_matches( 180 | gt_box, gt_class_id, gt_mask, 181 | pred_box, pred_class_id, pred_score, pred_mask, 182 | iou_threshold=iou_threshold, score_threshold=score_threshold) 183 | # Ground truth = green. Predictions = red 184 | colors = [(0, 1, 0, .8)] * len(gt_match)\ 185 | + [(1, 0, 0, 1)] * len(pred_match) 186 | # Concatenate GT and predictions 187 | class_ids = np.concatenate([gt_class_id, pred_class_id]) 188 | scores = np.concatenate([np.zeros([len(gt_match)]), pred_score]) 189 | boxes = np.concatenate([gt_box, pred_box]) 190 | masks = np.concatenate([gt_mask, pred_mask], axis=-1) 191 | # Captions per instance show score/IoU 192 | captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format( 193 | pred_score[i], 194 | (overlaps[i, int(pred_match[i])] 195 | if pred_match[i] > -1 else overlaps[i].max())) 196 | for i in range(len(pred_match))] 197 | # Set title if not provided 198 | title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU" 199 | # Display 200 | display_instances( 201 | image, 202 | boxes, masks, class_ids, 203 | class_names, scores, ax=ax, 204 | show_bbox=show_box, show_mask=show_mask, 205 | colors=colors, captions=captions, 206 | title=title) 207 | 208 | 209 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 210 | """ 211 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 212 | proposals: [n, 4] the same anchors but refined to fit objects better. 213 | """ 214 | masked_image = image.copy() 215 | 216 | # Pick random anchors in case there are too many. 217 | ids = np.arange(rois.shape[0], dtype=np.int32) 218 | ids = np.random.choice( 219 | ids, limit, replace=False) if ids.shape[0] > limit else ids 220 | 221 | fig, ax = plt.subplots(1, figsize=(12, 12)) 222 | if rois.shape[0] > limit: 223 | plt.title("Showing {} random ROIs out of {}".format( 224 | len(ids), rois.shape[0])) 225 | else: 226 | plt.title("{} ROIs".format(len(ids))) 227 | 228 | # Show area outside image boundaries. 229 | ax.set_ylim(image.shape[0] + 20, -20) 230 | ax.set_xlim(-50, image.shape[1] + 20) 231 | ax.axis('off') 232 | 233 | for i, id in enumerate(ids): 234 | color = np.random.rand(3) 235 | class_id = class_ids[id] 236 | # ROI 237 | y1, x1, y2, x2 = rois[id] 238 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 239 | edgecolor=color if class_id else "gray", 240 | facecolor='none', linestyle="dashed") 241 | ax.add_patch(p) 242 | # Refined ROI 243 | if class_id: 244 | ry1, rx1, ry2, rx2 = refined_rois[id] 245 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 246 | edgecolor=color, facecolor='none') 247 | ax.add_patch(p) 248 | # Connect the top-left corners of the anchor and proposal for easy visualization 249 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 250 | 251 | # Label 252 | label = class_names[class_id] 253 | ax.text(rx1, ry1 + 8, "{}".format(label), 254 | color='w', size=11, backgroundcolor="none") 255 | 256 | # Mask 257 | m = utils.unmold_mask(mask[id], rois[id] 258 | [:4].astype(np.int32), image.shape) 259 | masked_image = apply_mask(masked_image, m, color) 260 | 261 | ax.imshow(masked_image) 262 | 263 | # Print stats 264 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 265 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 266 | print("Positive Ratio: {:.2f}".format( 267 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 268 | 269 | 270 | # TODO: Replace with matplotlib equivalent? 271 | def draw_box(image, box, color): 272 | """Draw 3-pixel width bounding boxes on the given image array. 273 | color: list of 3 int values for RGB. 274 | """ 275 | y1, x1, y2, x2 = box 276 | image[y1:y1 + 2, x1:x2] = color 277 | image[y2:y2 + 2, x1:x2] = color 278 | image[y1:y2, x1:x1 + 2] = color 279 | image[y1:y2, x2:x2 + 2] = color 280 | return image 281 | 282 | 283 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 284 | """Display the given image and the top few class masks.""" 285 | to_display = [] 286 | titles = [] 287 | to_display.append(image) 288 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 289 | # Pick top prominent classes in this image 290 | unique_class_ids = np.unique(class_ids) 291 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 292 | for i in unique_class_ids] 293 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 294 | key=lambda r: r[1], reverse=True) if v[1] > 0] 295 | # Generate images and titles 296 | for i in range(limit): 297 | class_id = top_ids[i] if i < len(top_ids) else -1 298 | # Pull masks of instances belonging to the same class. 299 | m = mask[:, :, np.where(class_ids == class_id)[0]] 300 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 301 | to_display.append(m) 302 | titles.append(class_names[class_id] if class_id != -1 else "-") 303 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 304 | 305 | 306 | def plot_precision_recall(AP, precisions, recalls): 307 | """Draw the precision-recall curve. 308 | 309 | AP: Average precision at IoU >= 0.5 310 | precisions: list of precision values 311 | recalls: list of recall values 312 | """ 313 | # Plot the Precision-Recall curve 314 | _, ax = plt.subplots(1) 315 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 316 | ax.set_ylim(0, 1.1) 317 | ax.set_xlim(0, 1.1) 318 | _ = ax.plot(recalls, precisions) 319 | 320 | 321 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 322 | overlaps, class_names, threshold=0.5): 323 | """Draw a grid showing how ground truth objects are classified. 324 | gt_class_ids: [N] int. Ground truth class IDs 325 | pred_class_id: [N] int. Predicted class IDs 326 | pred_scores: [N] float. The probability scores of predicted classes 327 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes. 328 | class_names: list of all class names in the dataset 329 | threshold: Float. The prediction probability required to predict a class 330 | """ 331 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 332 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 333 | 334 | plt.figure(figsize=(12, 10)) 335 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 336 | plt.yticks(np.arange(len(pred_class_ids)), 337 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 338 | for i, id in enumerate(pred_class_ids)]) 339 | plt.xticks(np.arange(len(gt_class_ids)), 340 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 341 | 342 | thresh = overlaps.max() / 2. 343 | for i, j in itertools.product(range(overlaps.shape[0]), 344 | range(overlaps.shape[1])): 345 | text = "" 346 | if overlaps[i, j] > threshold: 347 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 348 | color = ("white" if overlaps[i, j] > thresh 349 | else "black" if overlaps[i, j] > 0 350 | else "grey") 351 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 352 | horizontalalignment="center", verticalalignment="center", 353 | fontsize=9, color=color) 354 | 355 | plt.tight_layout() 356 | plt.xlabel("Ground Truth") 357 | plt.ylabel("Predictions") 358 | 359 | 360 | def draw_boxes(image, boxes=None, refined_boxes=None, 361 | masks=None, captions=None, visibilities=None, 362 | title="", ax=None): 363 | """Draw bounding boxes and segmentation masks with different 364 | customizations. 365 | 366 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 367 | refined_boxes: Like boxes, but draw with solid lines to show 368 | that they're the result of refining 'boxes'. 369 | masks: [N, height, width] 370 | captions: List of N titles to display on each box 371 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 372 | prominent each bounding box should be. 373 | title: An optional title to show over the image 374 | ax: (optional) Matplotlib axis to draw on. 375 | """ 376 | # Number of boxes 377 | assert boxes is not None or refined_boxes is not None 378 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 379 | 380 | # Matplotlib Axis 381 | if not ax: 382 | _, ax = plt.subplots(1, figsize=(12, 12)) 383 | 384 | # Generate random colors 385 | colors = random_colors(N) 386 | 387 | # Show area outside image boundaries. 388 | margin = image.shape[0] // 10 389 | ax.set_ylim(image.shape[0] + margin, -margin) 390 | ax.set_xlim(-margin, image.shape[1] + margin) 391 | ax.axis('off') 392 | 393 | ax.set_title(title) 394 | 395 | masked_image = image.astype(np.uint32).copy() 396 | for i in range(N): 397 | # Box visibility 398 | visibility = visibilities[i] if visibilities is not None else 1 399 | if visibility == 0: 400 | color = "gray" 401 | style = "dotted" 402 | alpha = 0.5 403 | elif visibility == 1: 404 | color = colors[i] 405 | style = "dotted" 406 | alpha = 1 407 | elif visibility == 2: 408 | color = colors[i] 409 | style = "solid" 410 | alpha = 1 411 | 412 | # Boxes 413 | if boxes is not None: 414 | if not np.any(boxes[i]): 415 | # Skip this instance. Has no bbox. Likely lost in cropping. 416 | continue 417 | y1, x1, y2, x2 = boxes[i] 418 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 419 | alpha=alpha, linestyle=style, 420 | edgecolor=color, facecolor='none') 421 | ax.add_patch(p) 422 | 423 | # Refined boxes 424 | if refined_boxes is not None and visibility > 0: 425 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 426 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 427 | edgecolor=color, facecolor='none') 428 | ax.add_patch(p) 429 | # Connect the top-left corners of the anchor and proposal 430 | if boxes is not None: 431 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 432 | 433 | # Captions 434 | if captions is not None: 435 | caption = captions[i] 436 | # If there are refined boxes, display captions on them 437 | if refined_boxes is not None: 438 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 439 | x = random.randint(x1, (x1 + x2) // 2) 440 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 441 | color='w', backgroundcolor="none", 442 | bbox={'facecolor': color, 'alpha': 0.5, 443 | 'pad': 2, 'edgecolor': 'none'}) 444 | 445 | # Masks 446 | if masks is not None: 447 | mask = masks[:, :, i] 448 | masked_image = apply_mask(masked_image, mask, color) 449 | # Mask Polygon 450 | # Pad to ensure proper polygons for masks that touch image edges. 451 | padded_mask = np.zeros( 452 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 453 | padded_mask[1:-1, 1:-1] = mask 454 | contours = find_contours(padded_mask, 0.5) 455 | for verts in contours: 456 | # Subtract the padding and flip (y, x) to (x, y) 457 | verts = np.fliplr(verts) - 1 458 | p = Polygon(verts, facecolor="none", edgecolor=color) 459 | ax.add_patch(p) 460 | ax.imshow(masked_image.astype(np.uint8)) 461 | 462 | 463 | def display_table(table): 464 | """Display values in a table format. 465 | table: an iterable of rows, and each row is an iterable of values. 466 | """ 467 | html = "" 468 | for row in table: 469 | row_html = "" 470 | for col in row: 471 | row_html += "{:40}".format(str(col)) 472 | html += "" + row_html + "" 473 | html = "" + html + "
" 474 | IPython.display.display(IPython.display.HTML(html)) 475 | 476 | 477 | def display_weight_stats(model): 478 | """Scans all the weights in the model and returns a list of tuples 479 | that contain stats about each weight. 480 | """ 481 | layers = model.get_trainable_layers() 482 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] 483 | for l in layers: 484 | weight_values = l.get_weights() # list of Numpy arrays 485 | weight_tensors = l.weights # list of TF tensors 486 | for i, w in enumerate(weight_values): 487 | weight_name = weight_tensors[i].name 488 | # Detect problematic layers. Exclude biases of conv layers. 489 | alert = "" 490 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): 491 | alert += "*** dead?" 492 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: 493 | alert += "*** Overflow?" 494 | # Add row 495 | table.append([ 496 | weight_name + alert, 497 | str(w.shape), 498 | "{:+9.4f}".format(w.min()), 499 | "{:+10.4f}".format(w.max()), 500 | "{:+9.4f}".format(w.std()), 501 | ]) 502 | display_table(table) 503 | -------------------------------------------------------------------------------- /pothole_base/pothole/pothole.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Train on the toy pothole dataset and implement color splash effect. 4 | 5 | Copyright (c) 2018 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 pothole.py train --dataset=/path/to/pothole/dataset --weights=coco 16 | 17 | # Resume training a model that you had trained earlier 18 | python3 pothole.py train --dataset=/path/to/pothole/dataset --weights=last 19 | 20 | # Train a new model starting from ImageNet weights 21 | python3 pothole.py train --dataset=/path/to/pothole/dataset --weights=imagenet 22 | 23 | # Apply color splash to an image 24 | python3 pothole.py splash --weights=/path/to/weights/file.h5 --image= 25 | 26 | # Apply color splash to video using the last weights you trained 27 | python3 pothole.py splash --weights=last --video= 28 | """ 29 | 30 | import os 31 | import sys 32 | import json 33 | import datetime 34 | import numpy as np 35 | import skimage.draw 36 | 37 | # Root directory of the project 38 | ROOT_DIR = os.path.abspath("../../") 39 | 40 | # Import Mask RCNN 41 | sys.path.append(ROOT_DIR) # To find local version of the library 42 | from mrcnn.config import Config 43 | from mrcnn import model as modellib, utils 44 | 45 | # Path to trained weights file 46 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 47 | 48 | # Directory to save logs and model checkpoints, if not provided 49 | # through the command line argument --logs 50 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 51 | 52 | ############################################################ 53 | # Configurations 54 | ############################################################ 55 | 56 | 57 | class PotholeConfig(Config): 58 | """Configuration for training on the toy dataset. 59 | Derives from the base Config class and overrides some values. 60 | """ 61 | # Give the configuration a recognizable name 62 | NAME = "pothole" 63 | 64 | # We use a GPU with 12GB memory, which can fit two images. 65 | # Adjust down if you use a smaller GPU. 66 | IMAGES_PER_GPU = 1 67 | 68 | # Number of classes (including background) 69 | NUM_CLASSES = 1 + 1 # Background + pothole 70 | 71 | # Number of training steps per epoch 72 | STEPS_PER_EPOCH = 25 73 | 74 | # Skip detections with < 90% confidence 75 | DETECTION_MIN_CONFIDENCE = 0.9 76 | 77 | 78 | ############################################################ 79 | # Dataset 80 | ############################################################ 81 | 82 | class PotholeDataset(utils.Dataset): 83 | 84 | def load_pothole(self, dataset_dir, subset): 85 | """Load a subset of the pothole dataset. 86 | dataset_dir: Root directory of the dataset. 87 | subset: Subset to load: train or val 88 | """ 89 | # Add classes. We have only one class to add. 90 | self.add_class("pothole", 1, "pothole") 91 | 92 | # Train or validation dataset? 93 | assert subset in ["train", "val"] 94 | dataset_dir = os.path.join(dataset_dir, subset) 95 | 96 | # Load annotations 97 | # VGG Image Annotator saves each image in the form: 98 | # { 'filename': '28503151_5b5b7ec140_b.jpg', 99 | # 'regions': { 100 | # '0': { 101 | # 'region_attributes': {}, 102 | # 'shape_attributes': { 103 | # 'all_points_x': [...], 104 | # 'all_points_y': [...], 105 | # 'name': 'polygon'}}, 106 | # ... more regions ... 107 | # }, 108 | # 'size': 100202 109 | # } 110 | # We mostly care about the x and y coordinates of each region 111 | annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json"))) 112 | annotations = list(annotations.values()) # don't need the dict keys 113 | 114 | # The VIA tool saves images in the JSON even if they don't have any 115 | # annotations. Skip unannotated images. 116 | annotations = [a for a in annotations if a['regions']] 117 | 118 | # Add images 119 | for a in annotations: 120 | # Get the x, y coordinaets of points of the polygons that make up 121 | # the outline of each object instance. There are stores in the 122 | # shape_attributes (see json format above) 123 | polygons = [r['shape_attributes'] for r in a['regions']] 124 | 125 | # load_mask() needs the image size to convert polygons to masks. 126 | # Unfortunately, VIA doesn't include it in JSON, so we must read 127 | # the image. This is only managable since the dataset is tiny. 128 | image_path = os.path.join(dataset_dir, a['filename']) 129 | image = skimage.io.imread(image_path) 130 | height, width = image.shape[:2] 131 | 132 | self.add_image( 133 | "pothole", 134 | image_id=a['filename'], # use file name as a unique image id 135 | path=image_path, 136 | width=width, height=height, 137 | polygons=polygons) 138 | 139 | def load_mask(self, image_id): 140 | """Generate instance masks for an image. 141 | Returns: 142 | masks: A bool array of shape [height, width, instance count] with 143 | one mask per instance. 144 | class_ids: a 1D array of class IDs of the instance masks. 145 | """ 146 | # If not a pothole dataset image, delegate to parent class. 147 | image_info = self.image_info[image_id] 148 | if image_info["source"] != "pothole": 149 | return super(self.__class__, self).load_mask(image_id) 150 | 151 | # Convert polygons to a bitmap mask of shape 152 | # [height, width, instance_count] 153 | info = self.image_info[image_id] 154 | mask = np.zeros([info["height"], info["width"], len(info["polygons"])], 155 | dtype=np.uint8) 156 | for i, p in enumerate(info["polygons"]): 157 | # Get indexes of pixels inside the polygon and set them to 1 158 | rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x']) 159 | mask[rr, cc, i] = 1 160 | 161 | # Return mask, and array of class IDs of each instance. Since we have 162 | # one class ID only, we return an array of 1s 163 | return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32) 164 | 165 | def image_reference(self, image_id): 166 | """Return the path of the image.""" 167 | info = self.image_info[image_id] 168 | if info["source"] == "pothole": 169 | return info["path"] 170 | else: 171 | super(self.__class__, self).image_reference(image_id) 172 | 173 | 174 | def train(model): 175 | """Train the model.""" 176 | # Training dataset. 177 | dataset_train = PotholeDataset() 178 | dataset_train.load_pothole(args.dataset, "train") 179 | dataset_train.prepare() 180 | 181 | # Validation dataset 182 | dataset_val = PotholeDataset() 183 | dataset_val.load_pothole(args.dataset, "val") 184 | dataset_val.prepare() 185 | 186 | # *** This training schedule is an example. Update to your needs *** 187 | # Since we're using a very small dataset, and starting from 188 | # COCO trained weights, we don't need to train too long. Also, 189 | # no need to train all layers, just the heads should do it. 190 | print("Training network heads") 191 | model.train(dataset_train, dataset_val, 192 | learning_rate=config.LEARNING_RATE, 193 | epochs=10, 194 | layers='heads') 195 | 196 | 197 | def color_splash(image, mask): 198 | """Apply color splash effect. 199 | image: RGB image [height, width, 3] 200 | mask: instance segmentation mask [height, width, instance count] 201 | 202 | Returns result image. 203 | """ 204 | # Make a grayscale copy of the image. The grayscale copy still 205 | # has 3 RGB channels, though. 206 | gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255 207 | # Copy color pixels from the original color image where mask is set 208 | if mask.shape[-1] > 0: 209 | # We're treating all instances as one, so collapse the mask into one layer 210 | mask = (np.sum(mask, -1, keepdims=True) >= 1) 211 | splash = np.where(mask, image, gray).astype(np.uint8) 212 | else: 213 | splash = gray.astype(np.uint8) 214 | return splash 215 | 216 | 217 | def detect_and_color_splash(model, image_path=None, video_path=None): 218 | assert image_path or video_path 219 | 220 | # Image or video? 221 | if image_path: 222 | # Run model detection and generate the color splash effect 223 | print("Running on {}".format(args.image)) 224 | # Read image 225 | image = skimage.io.imread(args.image) 226 | # Detect objects 227 | r = model.detect([image], verbose=1)[0] 228 | # Color splash 229 | splash = color_splash(image, r['masks']) 230 | # Save output 231 | file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now()) 232 | skimage.io.imsave(file_name, splash) 233 | elif video_path: 234 | import cv2 235 | # Video capture 236 | vcapture = cv2.VideoCapture(video_path) 237 | width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH)) 238 | height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 239 | fps = vcapture.get(cv2.CAP_PROP_FPS) 240 | 241 | # Define codec and create video writer 242 | file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now()) 243 | vwriter = cv2.VideoWriter(file_name, 244 | cv2.VideoWriter_fourcc(*'MJPG'), 245 | fps, (width, height)) 246 | 247 | count = 0 248 | success = True 249 | while success: 250 | print("frame: ", count) 251 | # Read next image 252 | success, image = vcapture.read() 253 | if success: 254 | # OpenCV returns images as BGR, convert to RGB 255 | image = image[..., ::-1] 256 | # Detect objects 257 | r = model.detect([image], verbose=0)[0] 258 | # Color splash 259 | splash = color_splash(image, r['masks']) 260 | # RGB -> BGR to save image to video 261 | splash = splash[..., ::-1] 262 | # Add image to video writer 263 | vwriter.write(splash) 264 | count += 1 265 | vwriter.release() 266 | print("Saved to ", file_name) 267 | 268 | 269 | ############################################################ 270 | # Training 271 | ############################################################ 272 | 273 | if __name__ == '__main__': 274 | import argparse 275 | 276 | # Parse command line arguments 277 | parser = argparse.ArgumentParser( 278 | description='Train Mask R-CNN to detect potholes.') 279 | parser.add_argument("command", 280 | metavar="", 281 | help="'train' or 'splash'") 282 | parser.add_argument('--dataset', required=False, 283 | metavar="/path/to/pothole/dataset/", 284 | help='Directory of the pothole dataset') 285 | parser.add_argument('--weights', required=True, 286 | metavar="/path/to/weights.h5", 287 | help="Path to weights .h5 file or 'coco'") 288 | parser.add_argument('--logs', required=False, 289 | default=DEFAULT_LOGS_DIR, 290 | metavar="/path/to/logs/", 291 | help='Logs and checkpoints directory (default=logs/)') 292 | parser.add_argument('--image', required=False, 293 | metavar="path or URL to image", 294 | help='Image to apply the color splash effect on') 295 | parser.add_argument('--video', required=False, 296 | metavar="path or URL to video", 297 | help='Video to apply the color splash effect on') 298 | args = parser.parse_args() 299 | 300 | # Validate arguments 301 | if args.command == "train": 302 | assert args.dataset, "Argument --dataset is required for training" 303 | elif args.command == "splash": 304 | assert args.image or args.video,\ 305 | "Provide --image or --video to apply color splash" 306 | 307 | print("Weights: ", args.weights) 308 | print("Dataset: ", args.dataset) 309 | print("Logs: ", args.logs) 310 | 311 | # Configurations 312 | if args.command == "train": 313 | config = PotholeConfig() 314 | else: 315 | class InferenceConfig(PotholeConfig): 316 | # Set batch size to 1 since we'll be running inference on 317 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 318 | GPU_COUNT = 0 319 | IMAGES_PER_GPU = 0 320 | config = InferenceConfig() 321 | config.display() 322 | 323 | # Create model 324 | if args.command == "train": 325 | model = modellib.MaskRCNN(mode="training", config=config, 326 | model_dir=args.logs) 327 | else: 328 | model = modellib.MaskRCNN(mode="inference", config=config, 329 | model_dir=args.logs) 330 | 331 | # Select weights file to load 332 | if args.weights.lower() == "coco": 333 | weights_path = COCO_WEIGHTS_PATH 334 | # Download weights file 335 | if not os.path.exists(weights_path): 336 | utils.download_trained_weights(weights_path) 337 | elif args.weights.lower() == "last": 338 | # Find last trained weights 339 | weights_path = model.find_last() 340 | elif args.weights.lower() == "imagenet": 341 | # Start from ImageNet trained weights 342 | weights_path = model.get_imagenet_weights() 343 | else: 344 | weights_path = args.weights 345 | 346 | # Load weights 347 | print("Loading weights ", weights_path) 348 | if args.weights.lower() == "coco": 349 | # Exclude the last layers because they require a matching 350 | # number of classes 351 | model.load_weights(weights_path, by_name=True, exclude=[ 352 | "mrcnn_class_logits", "mrcnn_bbox_fc", 353 | "mrcnn_bbox", "mrcnn_mask"]) 354 | else: 355 | model.load_weights(weights_path, by_name=True) 356 | 357 | # Train or evaluate 358 | if args.command == "train": 359 | train(model) 360 | elif args.command == "splash": 361 | detect_and_color_splash(model, image_path=args.image, 362 | video_path=args.video) 363 | else: 364 | print("'{}' is not recognized. " 365 | "Use 'train' or 'splash'".format(args.command)) 366 | -------------------------------------------------------------------------------- /pothole_base/predictor.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import json 5 | import pickle 6 | import sys 7 | import signal 8 | import traceback 9 | import flask 10 | from flask import request 11 | 12 | #0v1# JC Sept 10, 2018 13 | 14 | #NOTES: 15 | #> See https://github.com/leongn/model_to_api/blob/master/container/sentiment_analysis/predictor.py 16 | #> https://machine-learning-company.nl/deploy-machine-learning-model-rest-api-using-aws/ 17 | 18 | prefix = '/opt/ml/' 19 | model_path = os.path.join(prefix, 'model') 20 | 21 | 22 | #Watch instances of flask 23 | #- swap to function over class 24 | #VNN=Video_NN_Service() 25 | from video_markup import flask_process_video 26 | 27 | 28 | # The flask app for serving predictions 29 | app = flask.Flask(__name__) 30 | 31 | @app.route('/ping', methods=['GET']) 32 | def ping(): 33 | """Determine if the container is working and healthy. In this sample container, we declare 34 | it healthy if we can load the model successfully.""" 35 | try: 36 | health=True #default ok 37 | except: 38 | health=None 39 | status = 200 if health else 404 40 | return flask.Response(response='\n', status=status, mimetype='application/json') 41 | 42 | @app.route('/invocations', methods=['POST']) 43 | def transformation(): 44 | # "input" 45 | # s3_bucket='' 46 | # s3_source_filename='' 47 | # live= 48 | 49 | input_json = request.get_json() 50 | print ("GIVEN RAW JSON: "+str(input_json)) 51 | input_json = input_json['input'] 52 | 53 | s3_source_filename=input_json.get('s3_source_filename') 54 | s3_bucket=input_json.get('s3_bucket','tests-road-damage') 55 | is_live=input_json.get('is_live',False) 56 | 57 | # Call service (run in background) 58 | if s3_source_filename: 59 | flask_process_video(s3_source_filename=s3_source_filename,s3_bucket=s3_bucket,is_live=is_live) 60 | 61 | 62 | # Transform predictions to JSON 63 | result = {'output': []} 64 | list_out = [] 65 | result['output'] = list_out 66 | result = json.dumps(result) 67 | return flask.Response(response=result, status=200, mimetype='application/json') 68 | 69 | @app.route('/debug', methods=['GET','POST']) 70 | def debug(): 71 | s3_source_filename='sagemaker/small_pot.mp4' 72 | s3_bucket='tests-road-damage' 73 | is_live=False 74 | 75 | flask_process_video(s3_source_filename=s3_source_filename,s3_bucket=s3_bucket,is_live=is_live) 76 | # VNN.process_video(source_filename=s3_source_filename,bucket_name=s3_bucket,is_live=is_live) 77 | 78 | # Transform predictions to JSON 79 | result = {'output': []} 80 | list_out = [] 81 | for label in predictions: 82 | row_format = {'label': label} 83 | list_out.append(row_format) 84 | result['output'] = list_out 85 | result = json.dumps(result) 86 | return flask.Response(response=result, status=200, mimetype='application/json') 87 | 88 | #debug# app.run(host="127.0.0.1",port=5000,debug=True) 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /pothole_base/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 3 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 4 | # gunicorn exits. 5 | # 6 | # The flask server is specified to be the app object in wsgi.py 7 | # 8 | # We set the following parameters: 9 | # 10 | # Parameter Environment Variable Default Value 11 | # --------- -------------------- ------------- 12 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 13 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 14 | 15 | 16 | # NOTS: 17 | #- Most people can skip this part as they will not have to modify the number of workers their server uses. But 18 | # in the case your model is big (say at least 2GB) you might want to modify the model_server_workers parameter 19 | #in the serve file. This parameter determines how many instances of the Gunicorn server will be started in parallel. 20 | 21 | from __future__ import print_function 22 | import multiprocessing 23 | import os 24 | import signal 25 | import subprocess 26 | import sys 27 | 28 | cpu_count = multiprocessing.cpu_count() 29 | cpu_count = 2 #translates to instances of server workers 30 | 31 | TIMEOUT=60 32 | TIMEOUT=60*60*10 #10 hours?! 33 | 34 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', TIMEOUT) 35 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 36 | 37 | def sigterm_handler(nginx_pid, gunicorn_pid): 38 | try: 39 | os.kill(nginx_pid, signal.SIGQUIT) 40 | except OSError: 41 | pass 42 | try: 43 | os.kill(gunicorn_pid, signal.SIGTERM) 44 | except OSError: 45 | pass 46 | 47 | sys.exit(0) 48 | 49 | def start_server(): 50 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 51 | 52 | 53 | # link the log streams to stdout/err so they will be logged to the container logs 54 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 55 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 56 | 57 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 58 | gunicorn = subprocess.Popen(['gunicorn', 59 | '--timeout', str(model_server_timeout), 60 | '-k', 'gevent', 61 | '-b', 'unix:/tmp/gunicorn.sock', 62 | '-w', str(model_server_workers), 63 | 'wsgi:app']) 64 | 65 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 66 | 67 | # If either subprocess exits, so do we. 68 | pids = set([nginx.pid, gunicorn.pid]) 69 | while True: 70 | pid, _ = os.wait() 71 | if pid in pids: 72 | break 73 | 74 | sigterm_handler(nginx.pid, gunicorn.pid) 75 | print('Inference server exiting') 76 | 77 | # The main routine just invokes the start function. 78 | 79 | if __name__ == '__main__': 80 | start_server() 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /pothole_base/settings.ini: -------------------------------------------------------------------------------- 1 | [aws] 2 | AWS_ACCESS_KEY_ID=YOUR AWS ACCESS KEY 3 | AWS_SECRET_ACCESS_KEY= YOUR AWS SECRET KEY 4 | -------------------------------------------------------------------------------- /pothole_base/train: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # A sample training component that trains a simple scikit-learn decision tree model. 4 | # This implementation works in File mode and makes no assumptions about the input file names. 5 | # Input is specified as CSV with a data point in each row and the labels in the first column. 6 | 7 | from __future__ import print_function 8 | 9 | import os 10 | import json 11 | import pickle 12 | import sys 13 | import traceback 14 | 15 | import pandas as pd 16 | 17 | from sklearn import tree 18 | 19 | # These are the paths to where SageMaker mounts interesting things in your container. 20 | 21 | prefix = '/opt/ml/' 22 | 23 | input_path = prefix + 'input/data' 24 | output_path = os.path.join(prefix, 'output') 25 | model_path = os.path.join(prefix, 'model') 26 | param_path = os.path.join(prefix, 'input/config/hyperparameters.json') 27 | 28 | # This algorithm has a single channel of input data called 'training'. Since we run in 29 | # File mode, the input files are copied to the directory specified here. 30 | channel_name='training' 31 | training_path = os.path.join(input_path, channel_name) 32 | 33 | # The function to execute the training. 34 | def train(): 35 | print('Starting the training.') 36 | try: 37 | # Read in any hyperparameters that the user passed with the training job 38 | with open(param_path, 'r') as tc: 39 | trainingParams = json.load(tc) 40 | 41 | # Take the set of files and read them all into a single pandas dataframe 42 | input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ] 43 | if len(input_files) == 0: 44 | raise ValueError(('There are no files in {}.\n' + 45 | 'This usually indicates that the channel ({}) was incorrectly specified,\n' + 46 | 'the data specification in S3 was incorrectly specified or the role specified\n' + 47 | 'does not have permission to access the data.').format(training_path, channel_name)) 48 | raw_data = [ pd.read_csv(file, header=None) for file in input_files ] 49 | train_data = pd.concat(raw_data) 50 | 51 | # labels are in the first column 52 | train_y = train_data.ix[:,0] 53 | train_X = train_data.ix[:,1:] 54 | 55 | # Here we only support a single hyperparameter. Note that hyperparameters are always passed in as 56 | # strings, so we need to do any necessary conversions. 57 | max_leaf_nodes = trainingParams.get('max_leaf_nodes', None) 58 | if max_leaf_nodes is not None: 59 | max_leaf_nodes = int(max_leaf_nodes) 60 | 61 | # Now use scikit-learn's decision tree classifier to train the model. 62 | clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes) 63 | clf = clf.fit(train_X, train_y) 64 | 65 | # save the model 66 | with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'w') as out: 67 | pickle.dump(clf, out) 68 | print('Training complete.') 69 | except Exception as e: 70 | # Write out an error file. This will be returned as the failureReason in the 71 | # DescribeTrainingJob result. 72 | trc = traceback.format_exc() 73 | with open(os.path.join(output_path, 'failure'), 'w') as s: 74 | s.write('Exception during training: ' + str(e) + '\n' + trc) 75 | # Printing this causes the exception to be in the training job logs, as well. 76 | print('Exception during training: ' + str(e) + '\n' + trc, file=sys.stderr) 77 | # A non-zero exit code causes the training job to be marked as Failed. 78 | sys.exit(255) 79 | 80 | if __name__ == '__main__': 81 | train() 82 | 83 | # A zero exit code causes the job to be marked a Succeeded. 84 | sys.exit(0) 85 | -------------------------------------------------------------------------------- /pothole_base/video_markup.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import sys 4 | import random 5 | import math 6 | import numpy as np 7 | import scipy.misc 8 | import cv2 9 | import json 10 | from moviepy.editor import VideoFileClip 11 | from IPython.display import HTML 12 | import logging 13 | 14 | #import matplotlib 15 | #import matplotlib.pyplot as plt 16 | #import matplotlib.patches as patches 17 | #import matplotlib.lines as lines 18 | from matplotlib.patches import Polygon 19 | import IPython.display 20 | import colorsys 21 | 22 | import boto3 23 | 24 | #import coco 25 | import csv 26 | from PIL import Image 27 | from pytesseract import * 28 | 29 | sys.path.insert(0,'./pothole') #Relative import 30 | import pothole 31 | from mrcnn import utils 32 | from mrcnn import visualize 33 | from mrcnn.visualize import display_images 34 | import mrcnn.model as modellib 35 | from mrcnn.model import log 36 | 37 | 38 | 39 | #0v1# JC Sept 9, 2018 Initial setup 40 | 41 | 42 | #FIX FOR MULTITHREADED? 43 | from keras import backend #See get_session() 44 | #>https://github.com/keras-team/keras/issues/2397 45 | #tf# import tensorflow as tf 46 | #tf# global graph,model 47 | #tf# graph = tf.get_default_graph() 48 | #tf# #K.clear_session() 49 | 50 | 51 | # Root directory of the project 52 | ROOT_DIR = os.getcwd() 53 | 54 | # Config ENV 55 | ####################### 56 | #import ConfigParser 57 | import configparser as ConfigParser 58 | #BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '.')) 59 | Config = ConfigParser.ConfigParser() 60 | Config.read(ROOT_DIR+"/settings.ini") 61 | AWS_ACCESS_KEY_ID=Config.get('aws','AWS_ACCESS_KEY_ID') 62 | AWS_SECRET_ACCESS_KEY=Config.get('aws','AWS_SECRET_ACCESS_KEY') 63 | os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID 64 | os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY 65 | 66 | 67 | # Directory to save logs and trained model 68 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 69 | 70 | # Path to trained weights file 71 | # Download this file and place in the root of your 72 | # project (See README file for details) 73 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "pothole/mask_rcnn_pothole_0005.h5") 74 | 75 | # Directory of images to run detection on 76 | IMAGE_DIR = os.path.join(ROOT_DIR, "images") 77 | config = pothole.PotholeConfig() 78 | 79 | TEMP_DIR=ROOT_DIR+"/temp" 80 | 81 | 82 | # Interface between Sagemaker caller/lambda and backend resources 83 | #- extends flask predictor.py (context request) 84 | #- implements pothole-detection.ipynb 85 | 86 | 87 | class InferenceConfig(config.__class__): 88 | # Set batch size to 1 since we'll be running inference on 89 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 90 | GPU_COUNT = 1 91 | IMAGES_PER_GPU = 1 92 | 93 | def process_image(image, RCNN_MODEL='',title="", figsize=(16, 16), ax=None): 94 | #> pass model through lambda call 95 | # NOTE: The output you return should be a color image (3 channel) for processing video below 96 | # you should return the final output (image with lines are drawn on lanes 97 | #results = VNN.RCNN_MODEL.detect([image], verbose=0) 98 | 99 | with backend.get_session().graph.as_default() as g: 100 | #model = load_model(MODEL_PATH) 101 | results = RCNN_MODEL.detect([image], verbose=0) #Throws error if threaded model 102 | r = results[0] 103 | 104 | boxes = r['rois'] 105 | class_ids = r['class_ids'] 106 | scores = r['scores'] 107 | 108 | N = boxes.shape[0] 109 | 110 | # Show area outside image boundaries. 111 | font = cv2.FONT_HERSHEY_DUPLEX 112 | 113 | for i in range(N): 114 | class_id = class_ids[i] 115 | score = scores[i] if scores is not None else None 116 | label = 'pothole' 117 | 118 | y1, x1, y2, x2 = boxes[i] 119 | #cv2.rectangle(frame, (face_rect.left(),face_rect.top()), (face_rect.right(), face_rect.bottom()), (255,0,0), 3) 120 | cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 3) 121 | 122 | x = random.randint(x1, (x1 + x2) // 2) 123 | caption = "{} {:.3f}".format(label, score) if score else label 124 | cv2.putText(image, caption, (x1 + 6, y2 - 6), font, 0.5, (255, 255, 255), 1) 125 | 126 | im = Image.fromarray(image) 127 | text = image_to_string(im) 128 | data = GetCoordinates.GetDMS(text) 129 | height = y2-y1 130 | width = x2-x1 131 | area = width * height #in pixels, we don't know height and width of known reference to calculate pixels/inch 132 | if data: 133 | row = 'pothole' + ',' + str(area) + ',' + str(data) 134 | rows.append(row) 135 | return image 136 | 137 | 138 | 139 | def test_s3(): 140 | branches=['upload'] 141 | 142 | #Recall S3_Interface 143 | bucket_name='tests-road-damage' 144 | s3 = boto3.client( 's3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) 145 | 146 | if 'upload' in branches: 147 | local_filename="small_pot.mp4" 148 | target_filename="sagemaker/"+local_filename 149 | mb_size = os.path.getsize(local_filename) / 1e6 150 | print ("Uploading "+local_filename+" size: "+str(mb_size)+" MB...") 151 | s3.upload_file(local_filename, bucket_name, target_filename) 152 | 153 | if 'download' in branches: 154 | local_filename="delthis" 155 | source_filename="sagemaker/sage_handler.py" 156 | response = s3.get_object(Bucket=bucket_name,Key=source_filename) 157 | file_contents = response['Body'].read() 158 | print ("Loaded: "+str(file_contents)) 159 | 160 | print ("DONE test_s3") 161 | return 162 | 163 | def clip_movie(): 164 | #https://zulko.github.io/moviepy/getting_started/videoclips.html 165 | filename='C:/scripts-18/sagemaker/pothole-master/samples/pothole/datasets/pothole/potholes_42_miles.mp4' 166 | clip1 = VideoFileClip(filename).subclip(55,58) 167 | clip1.write_videofile("small_pot.mp4") # the gif will have 30 fps 168 | return 169 | 170 | 171 | 172 | 173 | class Video_NN_Service(object): 174 | def __init__(self): 175 | return 176 | 177 | def initialize(self): 178 | #GLOBAL MODEL 179 | config = InferenceConfig() 180 | # Create model object in inference mode. 181 | self.RCNN_MODEL = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config) 182 | # Load weights trained on MS-COCO 183 | self.RCNN_MODEL.load_weights(COCO_MODEL_PATH, by_name=True) 184 | logging.info("Loaded weights...") 185 | return 186 | 187 | def _fetch_filenames(self,source_filename): 188 | global TEMP_DIR 189 | if not os.path.exists(TEMP_DIR):hard_fail=no_temp 190 | 191 | target_s3_filename=re.sub(r'\.(.{2,5})$',r'_output.\1',source_filename) 192 | temp_input_filename=TEMP_DIR+'/'+re.sub(r'.*\/','',source_filename) 193 | 194 | temp_output_filename=re.sub(r'\.(.{2,5})$',r'_output.\1',source_filename) 195 | temp_output_filename=TEMP_DIR+"/"+re.sub(r'.*\/','',temp_output_filename) 196 | 197 | return target_s3_filename,temp_input_filename,temp_output_filename 198 | 199 | def process_video(self,source_filename="sagemaker/small_pot.mp4",bucket_name='tests-road-damage',is_live=False): 200 | #Generic flow 201 | 202 | #Fetch and validate filename 203 | target_s3_filename,temp_input_filename,temp_output_filename=self._fetch_filenames(source_filename) 204 | 205 | self.s32temp(source_filename,temp_input_filename,bucket_name) 206 | 207 | if not is_live: #debug_force_short_clip: 208 | clip1 = VideoFileClip(temp_input_filename).subclip(0,0.01) #read() -- alternatively, via url 209 | else: 210 | clip1 = VideoFileClip(temp_input_filename) 211 | 212 | print ("loaded video clip duration: "+str(clip1.duration)) 213 | print ("Writing video to temp: "+temp_output_filename) 214 | 215 | #white_clip = clip1.fl_image(process_image) #NOTE: this function expects color images!!s 216 | white_clip = clip1.fl_image(lambda image: process_image(image,RCNN_MODEL=self.RCNN_MODEL)) #NOTE: this function expects color images!!s 217 | white_clip.write_videofile(temp_output_filename, audio=False, bitrate="5000k") 218 | 219 | print ("Upload results to s3: "+target_s3_filename) 220 | self.temp2s3(temp_output_filename,target_s3_filename,bucket_name) 221 | 222 | if False: 223 | try: os.remove(temp_input_filename) 224 | except:pass 225 | try: os.remove(temp_output_filename) 226 | except:pass 227 | return 228 | 229 | def ping(self): 230 | return True 231 | def shutdown(self): 232 | #[] release model/memory 233 | return 234 | 235 | def s32temp(self,source_filename,temp_filename,bucket_name,use_cache=True): 236 | #Ideally not "in mem" 237 | #> VideoFileClip requires filename (otherwise url) -- but no file objects 238 | if not os.path.exists(temp_filename) or not use_cache: 239 | s3 = boto3.client( 's3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) 240 | print ("Downloading video clip: "+str(source_filename)) 241 | video_source=s3.get_object(Bucket=bucket_name,Key=source_filename)['Body'] #StreamingBody then #.read() for file byte string 242 | #Expects filename# self.reader = FFMPEG_VideoReader(filename, pix_fmt=pix_fmt, 243 | fp=open(temp_filename,'wb') 244 | fp.write(video_source.read()) 245 | fp.close() 246 | return 247 | 248 | def temp2s3(self,local_filename,target_filename,bucket_name): 249 | s3 = boto3.client( 's3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) 250 | mb_size = os.path.getsize(local_filename) / 1e6 251 | print ("Uploading "+local_filename+" size: "+str(mb_size)+" MB...") 252 | s3.upload_file(local_filename, bucket_name, target_filename) 253 | return 254 | 255 | 256 | 257 | #Move out here because of multithreads 258 | 259 | VNN=Video_NN_Service() 260 | VNN.initialize() 261 | 262 | def flask_process_video(s3_source_filename='',s3_bucket='',is_live=False): 263 | global VNN 264 | VNN.process_video(source_filename=s3_source_filename,bucket_name=s3_bucket,is_live=is_live) 265 | return 266 | 267 | 268 | def run_process_video(source_filename="sagemaker/small_pot.mp4"): 269 | global VNN 270 | VNN=Video_NN_Service() 271 | VNN.initialize() 272 | VNN.process_video(source_filename=source_filename) 273 | VNN.shutdown() 274 | return 275 | 276 | def test(): 277 | dd='{"input": {"s3_source_filename" : "/sagemaker/small_pot.mp4", "s3_bucket" : "tests-road-damange", "is_live": false} }' 278 | dd=json.loads(dd) 279 | input_json = dd['input'] 280 | print ("FO: "+str(input_json)) 281 | 282 | return 283 | 284 | if __name__=='__main__': 285 | branches=['clip_movie'] 286 | branches=['test_s3'] 287 | branches=['test'] 288 | branches=['run_process_video'] 289 | for b in branches: 290 | globals()[b]() 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | -------------------------------------------------------------------------------- /pothole_base/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | --------------------------------------------------------------------------------