├── .dockerignore ├── .ebextensions ├── download_models.config └── increase_timeout.config ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── Dockerfile.tests ├── Dockerrun.aws.json ├── LICENSE ├── Makefile ├── README.md ├── docs ├── .gitkeep ├── cat_list.txt ├── example_payload_multiple.json ├── example_payload_one.json ├── index.html ├── kwd_list.txt ├── multiple_response.json └── one_response.json ├── models ├── .gitkeep └── test │ ├── cat_raw2lemma.json │ ├── categories │ └── models │ │ ├── chemistry and materials.pkl │ │ ├── engineering.pkl │ │ ├── geosciences.pkl │ │ ├── life sciences.pkl │ │ ├── physics.pkl │ │ └── space sciences.pkl │ ├── config.yml │ ├── feature_matrix.jbl │ ├── keywords │ └── models │ │ └── topic_ │ │ └── finite element method.pkl │ ├── kwd_raw2lemma.json │ ├── test_inds.npy │ ├── train_inds.npy │ └── vectorizer.jbl ├── notebook └── .gitignore ├── requirements.txt ├── service ├── .dockerignore ├── .gitignore ├── __init__.py ├── app.py ├── config │ └── default_config.py ├── static │ ├── media │ │ ├── NASA_logo.png │ │ └── ajax-loader.gif │ └── style │ │ └── style.css └── templates │ └── home.html ├── src └── get_tag_names.py ├── tests ├── .gitignore ├── __init__.py ├── context.py └── test_app.py └── version.py /.dockerignore: -------------------------------------------------------------------------------- 1 | notebook/* 2 | data/* 3 | models/* 4 | scratch/* 5 | bert_models/* 6 | __pycache__/* 7 | docs/* 8 | new_data/* 9 | data2/* 10 | in_data/* 11 | out_data/* 12 | venv/* 13 | -------------------------------------------------------------------------------- /.ebextensions/download_models.config: -------------------------------------------------------------------------------- 1 | sources: 2 | /var/app/current/models/10_23_2019: https://data.nasa.gov/docs/datasets/public/concept_tagging_models/10_23_2019.zip 3 | -------------------------------------------------------------------------------- /.ebextensions/increase_timeout.config: -------------------------------------------------------------------------------- 1 | option_settings: 2 | - namespace: aws:elasticbeanstalk:command 3 | option_name: Timeout 4 | value: 3600 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb 2 | *.zip 3 | classifier_scripts/* 4 | classifier_scripts 5 | concept-tagging-training/* 6 | concept-tagging-training 7 | classifier_scripts-* 8 | *.txt 9 | !requirements.txt 10 | !docs/*.txt 11 | sti_tagger.zip 12 | *.h5 13 | service_bandit_analysis.txt 14 | .coverage 15 | cover/* 16 | scratch/* 17 | set_path.sh 18 | web.bfg-report 19 | venv/* 20 | .idea/* 21 | data/* 22 | __pycache__/* 23 | in_data/* 24 | out_data/* 25 | data2/* 26 | data/new_sti/* 27 | data/sti/* 28 | data/pmc/* 29 | bert_models/* 30 | bert_models/* 31 | models/* 32 | !models/test 33 | !models/.gitkeep 34 | __pycache__/* 35 | in_data/* 36 | new_data/* 37 | out_data/* 38 | topic_bert/* 39 | service/config/* 40 | !service/config/default_config.py 41 | !serivce/config/default_config.py 42 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | Releases page is here. 5 | 6 | ## [Unreleased] 7 | 8 | ## [v5.0.12] - 2020-06-16 9 | #### Fixed: 10 | Models are now loaded before the API starts. 11 | 12 | #### Added: 13 | Added option to preload the models or load them for each request. 14 | 15 | ## [v5.0.11] - 2020-06-16 16 | #### Fixed: 17 | Fixed various links to point to the open source code respositories instead of internal NASA repositories. 18 | 19 | #### Added: 20 | - Make command to download models from data.nasa.gov. 21 | - .ebsextensions command for downloading models for elastic beanstalk deployment 22 | 23 | 24 | ## [v5.0.9-open_source_release] - 2020-06-10 25 | #### Added: 26 | Original open-source release of this repository on github.com/nasa after having received SRA (software release authority) approval. 27 | 28 | 29 | 30 | # Guidelines for ChangeLog Entries 31 | 32 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 33 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 34 | 35 | ### Guiding Principles 36 | - Changelogs are for humans, not machines. 37 | - There should be an entry for every single version. 38 | - The same types of changes should be grouped. 39 | - Versions and sections should be linkable. 40 | - The latest version comes first. 41 | - The release date of each version is displayed. 42 | 43 | ### All Entries Sould be Under One of These Types of changes 44 | - Added for new features. 45 | - Changed for changes in existing functionality. 46 | - Deprecated for soon-to-be removed features. 47 | - Removed for now removed features. 48 | - Fixed for any bug fixes. 49 | - Security in case of vulnerabilities. 50 | 51 | Google technical writer Sarah Maddox gave the following advice about release notes: 52 | `“The most important function of release notes is to let customers know that something has changed in the product, particularly when that something may affect the way the customer uses the product.”` 53 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7 2 | 3 | # Install dependencies 4 | USER root 5 | COPY requirements.txt /home/requirements.txt 6 | RUN pip install -r /home/requirements.txt 7 | ARG cachebust=2 8 | RUN pip install git+https://github.com/nasa/concept-tagging-training.git@v1.0.3-open_source_release 9 | RUN python -m spacy download en_core_web_sm 10 | 11 | # Include src and set permissions 12 | RUN mkdir /home/service 13 | COPY service home/service 14 | WORKDIR home/ 15 | 16 | # Label image with git commit url 17 | ARG GIT_URL=unspecified 18 | ARG VERSION=unspecified 19 | LABEL org.label-schema.schema-version=1.0 20 | LABEL org.label-schema.url=$GIT_URL 21 | LABEL org.label-schema.version=$VERSION 22 | ENV VERSION=$VERSION 23 | ENV MODELS_DIR=models/experiment 24 | 25 | # Set service to a user 26 | RUN groupadd -g 999 appuser && \ 27 | useradd -r -u 999 -g appuser appuser 28 | RUN chown -R 999:999 /home/service 29 | USER appuser 30 | WORKDIR /home/service 31 | 32 | ENV PRELOAD=True 33 | # Change PRELOAD to False to load models for each request. 34 | # Saves RAM with little effect on bulk requests 35 | # but a large effect on small requests. 36 | EXPOSE 5000 37 | ENTRYPOINT ["gunicorn", "app:app", "-b", " 0.0.0.0:5000"] 38 | CMD ["--timeout", "1200"] 39 | -------------------------------------------------------------------------------- /Dockerfile.tests: -------------------------------------------------------------------------------- 1 | FROM python:3.7 as base 2 | 3 | # Install dependencies 4 | USER root 5 | COPY requirements.txt /home/requirements.txt 6 | RUN pip install -r /home/requirements.txt 7 | ARG cachebust=2 8 | RUN pip install git+https://github.com/nasa/concept-tagging-training.git@v1.0.3-open_source_release 9 | RUN python -m spacy download en_core_web_sm 10 | 11 | # Include src and set permissions 12 | RUN mkdir /home/service 13 | COPY service home/service 14 | WORKDIR home/ 15 | 16 | # Label image with git commit url 17 | ARG GIT_URL=unspecified 18 | ARG VERSION=unspecified 19 | LABEL org.label-schema.schema-version=1.0 20 | LABEL org.label-schema.url=$GIT_URL 21 | LABEL org.label-schema.version=$VERSION 22 | ENV VERSION=$VERSION 23 | ENV MODELS_DIR=models/experiment 24 | 25 | # Run nose coverage tests in the image 26 | FROM base as tests 27 | RUN pip install nose && \ 28 | pip install coverage 29 | COPY tests /home/tests 30 | ARG cachebust=0 31 | RUN nosetests --with-coverage --cover-package service 32 | 33 | # Set service to a user 34 | FROM base as service 35 | RUN groupadd -g 999 appuser && \ 36 | useradd -r -u 999 -g appuser appuser 37 | RUN chown -R 999:999 /home/service 38 | USER appuser 39 | WORKDIR /home/service 40 | 41 | EXPOSE 5000 42 | ENTRYPOINT ["gunicorn", "app:app", "-b", " 0.0.0.0:5000"] 43 | CMD ["--timeout", "1200"] 44 | -------------------------------------------------------------------------------- /Dockerrun.aws.json: -------------------------------------------------------------------------------- 1 | { 2 | "AWSEBDockerrunVersion": "1", 3 | "Image": { 4 | "Name": "STI_tagger_v5_1" 5 | }, 6 | "Ports": [ 7 | { 8 | "ContainerPort": 5000, 9 | "HostPort": 5000 10 | } 11 | ], 12 | "Volumes": [ 13 | { 14 | "HostDirectory": "/var/app/current/models/10_23_2019", 15 | "ContainerDirectory": "/home/service/models/experiment" 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | Copyright (c) 2020, United States Government as represented by the Administrator of the National Aeronautics and Space Administration. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: tests help service 2 | .DEFAULT_GOAL := help 3 | 4 | BUCKET=your-bucket-name/path/to/your/project/dir 5 | DOCKER_REGISTRY=your-docker-registry.com/your-namespace 6 | PROFILE=default 7 | IMAGE_NAME=concept_tagging_api 8 | GIT_REMOTE=origin 9 | MODELS_DIR=models 10 | EXPERIMENT_NAME=test 11 | DOCKERFILE_NAME=Dockerfile 12 | MODELS_URL=https://data.nasa.gov/docs/datasets/public/concept_tagging_models/10_23_2019.zip 13 | PRELOAD=True 14 | PORT=5001 15 | 16 | ## Install requirements to local python environment 17 | requirements: 18 | pip install -r requirements.txt; \ 19 | pip install git+https://github.com/nasa/concept-tagging-training.git@v1.0.3-open_source_release#egg=dsconcept \ 20 | python -m spacy download en_core_web_sm 21 | 22 | ## Run test coverage with nosetests 23 | tests: 24 | pip install nose; \ 25 | pip install coverage==4.5.4; \ 26 | export MODELS_DIR=models/test; \ 27 | nosetests --with-coverage --cover-package service --cover-html; \ 28 | open cover/index.html 29 | 30 | examples: 31 | @echo "curl -X POST -H "Content-Type: application/json" -d @example.json http://0.0.0.0:5005/findterms/" 32 | 33 | 34 | 35 | ## Build docker image for service, automatically labeling image with link to most recent commit. 36 | ## Choose which Dockerfile to use with DOCKERFILE_NAME variable. The default requires you have downloaded the concept_tagging_training library. 37 | ## Dockerfile.tests includes testing in the docker build process. 38 | build: 39 | export COMMIT=$$(git log -1 --format=%H); \ 40 | export REPO_URL=$$(git remote get-url $(GIT_REMOTE)); \ 41 | export REPO_DIR=$$(dirname $$REPO_URL); \ 42 | export BASE_NAME=$$(basename $$REPO_URL .git); \ 43 | export GIT_LOC=$$REPO_DIR/$$BASE_NAME/tree/$$COMMIT; \ 44 | export VERSION=$$(python version.py); \ 45 | echo $$GIT_LOC; \ 46 | docker build -t $(IMAGE_NAME):$$VERSION \ 47 | -f $(DOCKERFILE_NAME) \ 48 | --build-arg GIT_URL=$$GIT_LOC \ 49 | --build-arg VERSION=$$VERSION . 50 | 51 | ## Push the docker image to storage.analytics.nasa.gov 52 | push: 53 | export VERSION=$$(python version.py); \ 54 | docker tag $(IMAGE_NAME):$$VERSION $(DOCKER_REGISTRY)/$(IMAGE_NAME):$$VERSION; \ 55 | docker tag $(IMAGE_NAME):$$VERSION $(DOCKER_REGISTRY)/$(IMAGE_NAME):latest; \ 56 | docker push $(DOCKER_REGISTRY)/$(IMAGE_NAME):$$VERSION; \ 57 | docker push $(DOCKER_REGISTRY)/$(IMAGE_NAME):latest 58 | 59 | ## Push docker image to storage.analytics.nasa.gov as stable version 60 | push-stable: 61 | export VERSION=$$(python version.py); \ 62 | docker tag $(IMAGE_NAME):$$VERSION $(DOCKER_REGISTRY)/$(IMAGE_NAME):stable; \ 63 | docker push $(DOCKER_REGISTRY)/$(IMAGE_NAME):stable 64 | 65 | ## Run the service using docker 66 | service: 67 | @echo $(MODELS_DIR)/$(EXPERIMENT_NAME) 68 | export VERSION=$$(python version.py); \ 69 | docker run -it \ 70 | -p $(PORT):5000 \ 71 | -v $$(pwd)/$(MODELS_DIR)/$(EXPERIMENT_NAME):/home/service/models/experiment \ 72 | -e PRELOAD=$(PRELOAD) \ 73 | $(IMAGE_NAME):$$VERSION 74 | 75 | ## Run the service locally without docker 76 | service-local: 77 | export MODELS_DIR=$(MODELS_DIR)/$(EXPERIMENT_NAME); \ 78 | python service/app.py 79 | 80 | TXT_DIR=$(MODELS_DIR)/$(EXPERIMENT_NAME)/tags_txts/ 81 | ## Get txt files of keyword tags 82 | get-tag-names: 83 | mkdir -p $(TXT_DIR); \ 84 | python src/get_tag_names.py \ 85 | --model_dir $(MODELS_DIR)/$(EXPERIMENT_NAME) \ 86 | --text_dir $(TXT_DIR) 87 | 88 | ## sync models from s3 bucket 89 | sync_models_from_s3: 90 | ifeq (default,$(PROFILE)) 91 | aws s3 sync s3://$(BUCKET)models/$(EXPERIMENT_NAME) models/$(EXPERIMENT_NAME) 92 | else 93 | aws s3 sync s3://$(BUCKET)models/$(EXPERIMENT_NAME) models/$(EXPERIMENT_NAME) --profile $(PROFILE) 94 | endif 95 | 96 | 97 | ## Download models from data.nasa.gov 98 | get-models: 99 | wget -O models/models.zip $(MODELS_URL); \ 100 | cd models && unzip models.zip 101 | 102 | ## zip files for service docker image for deployment to Elastic Beanstalk 103 | zip-for-ebs: 104 | zip -r --include 'service/*' 'classifier_scripts/*' \ 105 | 'requirements.txt' '.dockerignore' 'Dockerfile' \ 106 | 'Dockerrun.aws.json' '.ebextensions/*' @ sti_tagger.zip . 107 | 108 | help: 109 | @echo "$$(tput bold)Available rules:$$(tput sgr0)" 110 | @echo 111 | @sed -n -e "/^## / { \ 112 | h; \ 113 | s/.*//; \ 114 | :doc" \ 115 | -e "H; \ 116 | n; \ 117 | s/^## //; \ 118 | t doc" \ 119 | -e "s/:.*//; \ 120 | G; \ 121 | s/\\n## /---/; \ 122 | s/\\n/ /g; \ 123 | p; \ 124 | }" ${MAKEFILE_LIST} \ 125 | | LC_ALL='C' sort --ignore-case \ 126 | | awk -F '---' \ 127 | -v ncol=$$(tput cols) \ 128 | -v indent=19 \ 129 | -v col_on="$$(tput setaf 6)" \ 130 | -v col_off="$$(tput sgr0)" \ 131 | '{ \ 132 | printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ 133 | n = split($$2, words, " "); \ 134 | line_length = ncol - indent; \ 135 | for (i = 1; i <= n; i++) { \ 136 | line_length -= length(words[i]) + 1; \ 137 | if (line_length <= 0) { \ 138 | line_length = ncol - indent - length(words[i]) - 1; \ 139 | printf "\n%*s ", -indent, " "; \ 140 | } \ 141 | printf "%s ", words[i]; \ 142 | } \ 143 | printf "\n"; \ 144 | }' \ 145 | | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OCIO STI Concept Tagging Service 2 | 3 | An API for exposing models created with [STI concept training](https://github.com/nasa/concept-tagging-training). This project was written about [here](https://strategy.data.gov/proof-points/2019/05/28/improving-data-access-and-data-management-artificial-intelligence-generated-metadata-tags-at-nasa/) for the Federal Data Strategy Incubator Project. A running version of this API may be found [here](http://go.nasa.gov/concepttagger), however, this is a temporary instance for demos purposes. It may not be available long-term. Please do not use it in production or at scale. 4 | 5 | ### What is Concept Tagging 6 | By concept tagging, we mean you can supply text, for example: 7 | `Volcanic activity, or volcanism, has played a significant role in the geologic evolution of Mars.[2] Scientists have known since the Mariner 9 mission in 1972 that volcanic features cover large portions of the Martian surface.` and get back predicted keywords, like `volcanology, mars surface, and structural properties`, as well as topics, like `space sciences, geosciences`, from a standardized list of several thousand NASA concepts with a probability score for each prediction. 8 | 9 | ## Index 10 | 1. [Using Endpoint](#using-endpoint) 11 | 1. [Request](#request) 12 | 2. [Response](#response) 13 | 2. [Running Your Own Instance](#running-your-own-instance) 14 | 1. [Installation](#installation) 15 | 1. [Pull Docker Image](#pull-docker-image) 16 | 2. [Build Docker Image](#build-docker-image) 17 | 3. [With Local Python](#with-local-python) 18 | 2. [Download Models](#downloading-models) 19 | 3. [Running Service](#running-service) 20 | 1. [Using Docker](#using-docker) 21 | 2. [Using Local Python](#using-local-python) 22 | 23 | ## Using Endpoint 24 | ### Request 25 | The endpoint accepts a few fields, shown in this example: 26 | ```json 27 | { 28 | "text": [ 29 | "Astronauts go on space walks.", 30 | "Basalt rocks and minerals are on earth." 31 | ], 32 | "probability_threshold":"0.5", 33 | "topic_threshold":"0.9", 34 | "request_id":"example_id10" 35 | } 36 | ``` 37 | - **text** *(string or list of strings)* -- The text(s) to be tagged. 38 | - **probability_threshold** *(float in [0, 1])* -- a threshold under which a concept tag will not be returned by the API. For example, if the threshold is set to 0.8 and a concept only scores 0.5, the concept will be omitted from the response. Setting to 1 will yield no results. Setting to 0 will yield all of the classifiers and their scores, no matter how low. 39 | - **topic_threshold** *(float in [0, 1])* -- A probability threshold for categories. If a category falls under this threshold, its respective suite of models will not be utilized for prediction. If you set this value to 1, only the generalized concept models will be used for tagging, yielding significant speed gains. 40 | - **request_id** *(string)* -- an optional ID for your request. 41 | 42 | You might send this request using curl. In the command below: 43 | 1. Substitute `example_payload_multiple.json` with the path to your json request. 44 | 2. Substitute `http://0.0.0.0:5000/` with the address of the API instance. 45 | ``` 46 | curl -X POST -H "Content-Type: application/json" -d @example_payload_multiple.json http://0.0.0.0:5000/findterms/ 47 | ``` 48 | ### Response 49 | You will then receive a response like that [here](docs/multiple_response.json). In the `payload`, you will see multiple fields, including: 50 | - **features** -- words and phrases directly extracted from the document. 51 | - **sti_keywords** -- concepts and their prediction scores. 52 | - **topic_probability** -- model scores for all of the categories. 53 | 54 | ## Running Your Own Instance 55 | ### Installation 56 | For most people, the simplest installation entails [building the docker image](#build-docker-image), [downloading the models](#downloading-models), and [running the docker container](#using-docker). 57 | 58 | 59 | #### Build Docker Image 60 | First, clone this repository and enter its root. 61 | Now, you can build the image with: 62 | ``` 63 | docker build -t concept_tagging_api:example . 64 | ``` 65 | \* Developers should look at the `make build` command in the [Makefile](Makefile). It has an automated process for tagging the image with useful metadata. 66 | 67 | #### With Local Python 68 | \* tested with python:3.7 69 | First, clone this repository and enter its root. 70 | Now, create a virtual environment. For example, using [venv](https://docs.python.org/3/library/venv.html): 71 | ``` 72 | python -m venv venv 73 | source venv/bin/activate 74 | ``` 75 | Now install the requirements with: 76 | ``` 77 | make requirements 78 | ``` 79 | 80 | ### Downloading Models 81 | Then, you need to download the machine learning models upon which the service relies. 82 | 83 | You can find zipped file which contains all of the models [here](https://data.nasa.gov/docs/datasets/public/concept_tagging_models/10_23_2019.zip). Now, to get the models in the right place and unzip: 84 | ```bash 85 | mkdir models 86 | mv .zip models 87 | cd models 88 | unzip .zip 89 | ``` 90 | Alternatively, the models can also be downloaded from data.nasa.gov where they are named STI Tagging Models. However, they download slower from that location. 91 | 92 | ### Running Service 93 | 94 | #### Using Docker 95 | With the docker image and model files in place, you can now run the service with a simple docker command. In the below command be sure to: 96 | 1. Substitute `concept_tagging_api:example` for the name of your image. 97 | 2. Substitute `$(pwd)/models/10_23_2019` to the path to your models directory. 98 | 3. Substitute `5001` with the port on your local machine from which you wish to access the API. 99 | ``` 100 | docker run -it \ 101 | -p 5001:5000 \ 102 | -v $(pwd)/models/10_23_2019:/home/service/models/experiment \ 103 | concept_tagging_api:example 104 | ``` 105 | 106 | Note that you you may experience permission errors when you start the container. To resolve this issue, set the user and group of your `models` directory to 999. This is the uid for the user 107 | 108 | **optional** 109 | The entrypoint to the docker image is [gunicorn](https://docs.gunicorn.org/en/stable/index.html), a python WSGI HTTP Server which runs our flask app. You can optionally pass additionally arguments to gunicorn. For example: 110 | ```bash 111 | docker run -it \ 112 | -p 5001:5000 \ 113 | -v $(pwd)/models/10_23_2019:/home/service/models/experiment \ 114 | concept_tagging_api:example --timeout 9000 115 | ``` 116 | See [here](https://docs.gunicorn.org/en/stable/design.html#async-workers) for more information about design considerations for these gunicorn settings. 117 | 118 | #### Pitfalls & Gotchas to Remeber 119 | - If you run this on a cloud service and run an upgrade on everything out of date for security reasons, you may need to run `sudo service docker stop` 120 | and then `sudo service docker start` to get docker going again. You'll also have to find the docker container that you had last running and restart it. 121 | - If you run the docker container as described above, remember to try the URL of your service with the proper port at the end of the URL. 122 | 123 | #### Using Local Python 124 | With the requirements installed and the model files in place, you can now run the service with python locally. 125 | In the command below, substitute `models/test` with the path to your models directory. For example, if you followed the example from [With Bucket Access](#with-bucket-access), it will be `models/10_23_2019`. 126 | ``` 127 | export MODELS_DIR=models/test; \ 128 | python service/app.py 129 | ``` 130 | #### If you were a part of the legacy concept tagger api development team and need access to test server that's no longer available as of 11/9/23, please email us [here](mailto:hq-open-innovation@mail.nasa.gov). 131 | -------------------------------------------------------------------------------- /docs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/docs/.gitkeep -------------------------------------------------------------------------------- /docs/cat_list.txt: -------------------------------------------------------------------------------- 1 | astronautics 2 | life sciences 3 | geosciences 4 | social and information sciences 5 | mathematical and computer sciences 6 | space sciences 7 | general 8 | physics 9 | engineering 10 | chemistry and materials 11 | aeronautics 12 | -------------------------------------------------------------------------------- /docs/example_payload_multiple.json: -------------------------------------------------------------------------------- 1 | { 2 | "text": [ 3 | "Astronauts go on space walks.", 4 | "Basalt rocks and minerals are on earth." 5 | ], 6 | "probability_threshold":"0.6", 7 | "topic_threshold":"0.9", 8 | "request_id":"example_id10" 9 | } 10 | -------------------------------------------------------------------------------- /docs/example_payload_one.json: -------------------------------------------------------------------------------- 1 | { 2 | "text": "Volcanic activity, or volcanism, has played a significant role in the geologic evolution of Mars.[2] Scientists have known since the Mariner 9 mission in 1972 that volcanic features cover large portions of the Martian surface. These features include extensive lava flows, vast lava plains, and the largest known volcanoes in the Solar System.[3][4] Martian volcanic features range in age from Noachian (>3.7 billion years) to late Amazonian (< 500 million years), indicating that the planet has been volcanically active throughout its history,[5] and some speculate it probably still is so today.[6][7] Both Earth and Mars are large, differentiated planets built from similar chondritic materials.[8] Many of the same magmatic processes that occur on Earth also occurred on Mars, and both planets are similar enough compositionally that the same names can be applied to their igneous rocks and minerals.", 3 | "probability_threshold":"0.5", 4 | "topic_threshold":"0.9", 5 | "request_id":"example_id10" 6 | } 7 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | Hello. This is a test. 2 | -------------------------------------------------------------------------------- /docs/multiple_response.json: -------------------------------------------------------------------------------- 1 | { 2 | "code": 200, 3 | "interface_version": "2.0.0", 4 | "messages": [], 5 | "payload": { 6 | "features": [ 7 | { 8 | "astronaut": "NOUN", 9 | "space": "NOUN", 10 | "space walk": "NOUN_CHUNK", 11 | "walk": "NOUN" 12 | }, 13 | { 14 | "basalt": "NOUN", 15 | "basalt rock": "NOUN_CHUNK", 16 | "earth": "NOUN", 17 | "mineral": "NOUN", 18 | "rock": "NOUN" 19 | } 20 | ], 21 | "probability_threshold": 0.6, 22 | "request_id": "example_id10", 23 | "sti_keywords": [ 24 | [ 25 | { 26 | "keyword": "astronaut", 27 | "probability": 0.668371856212616, 28 | "unstemmed": "ASTRONAUTS" 29 | } 30 | ], 31 | [ 32 | { 33 | "keyword": "basalt", 34 | "probability": 0.7939732074737549, 35 | "unstemmed": "BASALT" 36 | }, 37 | { 38 | "keyword": "rock", 39 | "probability": 0.7780118584632874, 40 | "unstemmed": "ROCKS" 41 | }, 42 | { 43 | "keyword": "earth (planet)", 44 | "probability": 0.7754494547843933, 45 | "unstemmed": "EARTH (PLANET)" 46 | }, 47 | { 48 | "keyword": "mineral", 49 | "probability": 0.6153970956802368, 50 | "unstemmed": "MINERALS" 51 | } 52 | ] 53 | ], 54 | "topic_probabilities": [ 55 | [ 56 | { 57 | "keyword": "engineering", 58 | "probability": 0.3070921892559855, 59 | "unstemmed": "engineering" 60 | }, 61 | { 62 | "keyword": "astronautic", 63 | "probability": 0.25538644588380033, 64 | "unstemmed": "astronautics" 65 | }, 66 | { 67 | "keyword": "mathematical and computer science", 68 | "probability": 0.19811001658035526, 69 | "unstemmed": "mathematical and computer sciences" 70 | }, 71 | { 72 | "keyword": "physic", 73 | "probability": 0.1562576839880182, 74 | "unstemmed": "physics" 75 | }, 76 | { 77 | "keyword": "general", 78 | "probability": 0.15577113335853618, 79 | "unstemmed": "general" 80 | }, 81 | { 82 | "keyword": "geoscience", 83 | "probability": 0.12943611162195437, 84 | "unstemmed": "geosciences" 85 | }, 86 | { 87 | "keyword": "space science", 88 | "probability": 0.11847130918128303, 89 | "unstemmed": "space sciences" 90 | }, 91 | { 92 | "keyword": "life science", 93 | "probability": 0.09196319125679275, 94 | "unstemmed": "life sciences" 95 | }, 96 | { 97 | "keyword": "chemistry and material", 98 | "probability": 0.06369718485370329, 99 | "unstemmed": "chemistry and materials" 100 | }, 101 | { 102 | "keyword": "social and information science", 103 | "probability": 0.03550870993708698, 104 | "unstemmed": "social and information sciences" 105 | }, 106 | { 107 | "keyword": "aeronautic", 108 | "probability": 0.0198768734839847, 109 | "unstemmed": "aeronautics" 110 | } 111 | ], 112 | [ 113 | { 114 | "keyword": "geoscience", 115 | "probability": 0.8045973818316864, 116 | "unstemmed": "geosciences" 117 | }, 118 | { 119 | "keyword": "space science", 120 | "probability": 0.3025021186613736, 121 | "unstemmed": "space sciences" 122 | }, 123 | { 124 | "keyword": "engineering", 125 | "probability": 0.17311155904614617, 126 | "unstemmed": "engineering" 127 | }, 128 | { 129 | "keyword": "astronautic", 130 | "probability": 0.10161027467299147, 131 | "unstemmed": "astronautics" 132 | }, 133 | { 134 | "keyword": "mathematical and computer science", 135 | "probability": 0.07255095295187919, 136 | "unstemmed": "mathematical and computer sciences" 137 | }, 138 | { 139 | "keyword": "physic", 140 | "probability": 0.052449295673869516, 141 | "unstemmed": "physics" 142 | }, 143 | { 144 | "keyword": "general", 145 | "probability": 0.03884082731899018, 146 | "unstemmed": "general" 147 | }, 148 | { 149 | "keyword": "life science", 150 | "probability": 0.02371088825411408, 151 | "unstemmed": "life sciences" 152 | }, 153 | { 154 | "keyword": "aeronautic", 155 | "probability": 0.021397658355553247, 156 | "unstemmed": "aeronautics" 157 | }, 158 | { 159 | "keyword": "chemistry and material", 160 | "probability": 0.02060000507649233, 161 | "unstemmed": "chemistry and materials" 162 | }, 163 | { 164 | "keyword": "social and information science", 165 | "probability": 0.012841303426494808, 166 | "unstemmed": "social and information sciences" 167 | } 168 | ] 169 | ], 170 | "topic_threshold": 0.9 171 | }, 172 | "service_version": "unspecified", 173 | "status": "okay" 174 | } 175 | -------------------------------------------------------------------------------- /docs/one_response.json: -------------------------------------------------------------------------------- 1 | { 2 | "code": 200, 3 | "interface_version": "2.0.0", 4 | "messages": [], 5 | "payload": { 6 | "features": [ 7 | { 8 | "3.7 billion year": "ENT", 9 | "500 million year": "ENT", 10 | "< 500 million year": "NOUN_CHUNK", 11 | "> 3.7 billion year": "NOUN_CHUNK", 12 | "Amazonian": "PROPN", 13 | "Earth": "PROPN", 14 | "Mariner": "PROPN", 15 | "Mariner 9 mission": "NOUN_CHUNK", 16 | "Mars": "PROPN", 17 | "Mars.[2": "PROPN", 18 | "Noachian": "PROPN", 19 | "activity": "NOUN", 20 | "age": "NOUN", 21 | "evolution": "NOUN", 22 | "extensive lava flow": "NOUN_CHUNK", 23 | "feature": "NOUN", 24 | "flow": "NOUN", 25 | "geologic evolution": "NOUN_CHUNK", 26 | "history,[5": "PROPN", 27 | "igneous rock": "NOUN_CHUNK", 28 | "large , differentiated planet": "NOUN_CHUNK", 29 | "large know volcano": "NOUN_CHUNK", 30 | "large portion": "NOUN_CHUNK", 31 | "late Amazonian": "NOUN_CHUNK", 32 | "lava": "NOUN", 33 | "magmatic process": "NOUN_CHUNK", 34 | "martian": "ENT", 35 | "martian surface": "NOUN_CHUNK", 36 | "materials.[8": "PROPN", 37 | "mineral": "NOUN", 38 | "mission": "NOUN", 39 | "name": "NOUN", 40 | "plain": "NOUN", 41 | "planet": "NOUN", 42 | "portion": "NOUN", 43 | "process": "NOUN", 44 | "rock": "NOUN", 45 | "role": "NOUN", 46 | "scientist": "NOUN", 47 | "significant role": "NOUN_CHUNK", 48 | "similar chondritic materials.[8": "NOUN_CHUNK", 49 | "solar system.[3][4": "NOUN_CHUNK", 50 | "surface": "NOUN", 51 | "system.[3][4": "NOUN", 52 | "today.[6][7": "PROPN", 53 | "vast lava plain": "NOUN_CHUNK", 54 | "volcanic activity": "NOUN_CHUNK", 55 | "volcanic feature": "NOUN_CHUNK", 56 | "volcanism": "NOUN", 57 | "volcano": "NOUN", 58 | "year": "NOUN" 59 | } 60 | ], 61 | "probability_threshold": 0.5, 62 | "request_id": "example_id10", 63 | "sti_keywords": [ 64 | [ 65 | { 66 | "keyword": "lava", 67 | "probability": 0.988079309463501, 68 | "unstemmed": "LAVA" 69 | }, 70 | { 71 | "keyword": "volcanology", 72 | "probability": 0.9851651191711426, 73 | "unstemmed": "VOLCANOLOGY" 74 | }, 75 | { 76 | "keyword": "volcano", 77 | "probability": 0.9818959832191467, 78 | "unstemmed": "VOLCANOES" 79 | }, 80 | { 81 | "keyword": "mars surface", 82 | "probability": 0.8633611798286438, 83 | "unstemmed": "MARS SURFACE" 84 | }, 85 | { 86 | "keyword": "planetary evolution", 87 | "probability": 0.8472112417221069, 88 | "unstemmed": "PLANETARY EVOLUTION" 89 | }, 90 | { 91 | "keyword": "igneous rock", 92 | "probability": 0.8400911092758179, 93 | "unstemmed": "IGNEOUS ROCKS" 94 | }, 95 | { 96 | "keyword": "planetary geology", 97 | "probability": 0.8080313205718994, 98 | "unstemmed": "PLANETARY GEOLOGY" 99 | }, 100 | { 101 | "keyword": "mars volcanoe", 102 | "probability": 0.7860469818115234, 103 | "unstemmed": "MARS VOLCANOES" 104 | }, 105 | { 106 | "keyword": "rock", 107 | "probability": 0.7769719362258911, 108 | "unstemmed": "ROCKS" 109 | }, 110 | { 111 | "keyword": "geochronology", 112 | "probability": 0.672389805316925, 113 | "unstemmed": "GEOCHRONOLOGY" 114 | }, 115 | { 116 | "keyword": "planetary surface", 117 | "probability": 0.6425392627716064, 118 | "unstemmed": "PLANETARY SURFACES" 119 | }, 120 | { 121 | "keyword": "mineral", 122 | "probability": 0.5222469568252563, 123 | "unstemmed": "MINERALS" 124 | } 125 | ] 126 | ], 127 | "topic_probabilities": [ 128 | [ 129 | { 130 | "keyword": "space science", 131 | "probability": 0.9478434554989881, 132 | "unstemmed": "space sciences" 133 | }, 134 | { 135 | "keyword": "geoscience", 136 | "probability": 0.3247275011723163, 137 | "unstemmed": "geosciences" 138 | }, 139 | { 140 | "keyword": "life science", 141 | "probability": 0.09660761692405866, 142 | "unstemmed": "life sciences" 143 | }, 144 | { 145 | "keyword": "astronautic", 146 | "probability": 0.07529697551685584, 147 | "unstemmed": "astronautics" 148 | }, 149 | { 150 | "keyword": "general", 151 | "probability": 0.0257089739623935, 152 | "unstemmed": "general" 153 | }, 154 | { 155 | "keyword": "chemistry and material", 156 | "probability": 0.014512189462108511, 157 | "unstemmed": "chemistry and materials" 158 | }, 159 | { 160 | "keyword": "aeronautic", 161 | "probability": 0.011811348467138511, 162 | "unstemmed": "aeronautics" 163 | }, 164 | { 165 | "keyword": "engineering", 166 | "probability": 0.01105199628144047, 167 | "unstemmed": "engineering" 168 | }, 169 | { 170 | "keyword": "social and information science", 171 | "probability": 0.010935423720128255, 172 | "unstemmed": "social and information sciences" 173 | }, 174 | { 175 | "keyword": "mathematical and computer science", 176 | "probability": 0.006348297628701306, 177 | "unstemmed": "mathematical and computer sciences" 178 | }, 179 | { 180 | "keyword": "physic", 181 | "probability": 0.0052230788076608765, 182 | "unstemmed": "physics" 183 | } 184 | ] 185 | ], 186 | "topic_threshold": 0.9 187 | }, 188 | "service_version": "unspecified", 189 | "status": "okay" 190 | } 191 | -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/.gitkeep -------------------------------------------------------------------------------- /models/test/cat_raw2lemma.json: -------------------------------------------------------------------------------- 1 | {"chemistry and materials": "chemistry and materials", "aeronautics": "aeronautics", "geosciences": "geosciences", "physics": "physics", "mathematical and computer sciences": "mathematical and computer sciences", "life sciences": "life sciences", "social and information sciences": "social and information sciences", "space sciences": "space sciences", "astronautics": "astronautics", "engineering": "engineering"} -------------------------------------------------------------------------------- /models/test/categories/models/chemistry and materials.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/categories/models/chemistry and materials.pkl -------------------------------------------------------------------------------- /models/test/categories/models/engineering.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/categories/models/engineering.pkl -------------------------------------------------------------------------------- /models/test/categories/models/geosciences.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/categories/models/geosciences.pkl -------------------------------------------------------------------------------- /models/test/categories/models/life sciences.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/categories/models/life sciences.pkl -------------------------------------------------------------------------------- /models/test/categories/models/physics.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/categories/models/physics.pkl -------------------------------------------------------------------------------- /models/test/categories/models/space sciences.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/categories/models/space sciences.pkl -------------------------------------------------------------------------------- /models/test/config.yml: -------------------------------------------------------------------------------- 1 | weights: # assign weights for term types specified in process section 2 | NOUN: 1 3 | PROPN: 1 4 | NOUN_CHUNK: 1 5 | ENT: 1 6 | ACRONYM: 1 7 | min_feature_occurrence: 10 8 | max_feature_occurrence: 0.9 9 | min_concept_occurrence: 5 10 | -------------------------------------------------------------------------------- /models/test/feature_matrix.jbl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/feature_matrix.jbl -------------------------------------------------------------------------------- /models/test/keywords/models/topic_/finite element method.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/keywords/models/topic_/finite element method.pkl -------------------------------------------------------------------------------- /models/test/kwd_raw2lemma.json: -------------------------------------------------------------------------------- 1 | {"SUBROUTINE LIBRARIES (COMPUTERS)": "subroutine libraries (computers)", "STRUCTURAL RELIABILITY": "structural reliability", "PLASMA FREQUENCIES": "plasma frequencies", "ASTRONOMICAL MODELS": "astronomical models", "SHAFTS (MACHINE ELEMENTS)": "shafts (machine elements)", "FLAT PLATES": "flat plates", "AIRCRAFT DESIGN": "aircraft design", "SCATTERING": "scattering", "SKY SURVEYS (ASTRONOMY)": "sky surveys (astronomy)", "CIRCUIT RELIABILITY": "circuit reliability", "ALEXANDRITE": "alexandrite", "FINANCE": "finance", "LANDSAT 6": "landsat 6", "CONCENTRATORS": "concentrators", "SYNCHRONOUS SATELLITES": "synchronous satellites", "IRON": "iron", "STRESS CONCENTRATION": "stress concentration", "CLEAN ENERGY": "clean energy", "SURVEYS": "surveys", "BRIGHTNESS TEMPERATURE": "brightness temperature", "COOLANTS": "coolants", "TRENDS": "trends", "BLOOD": "blood", "SHEAR FLOW": "shear flow", "COAXIAL PLASMA ACCELERATORS": "coaxial plasma accelerators", "SPACE PROBES": "space probes", "MOIRE INTERFEROMETRY": "moire interferometry", "TURBULENT FLOW": "turbulent flow", "SOLAR SPECTRA": "solar spectra", "LIFE SCIENCES": "life sciences", "STREAM FUNCTIONS (FLUIDS)": "stream functions (fluids)", "PHYSIOLOGY": "physiology", "INVERSIONS": "inversions", "KNEE (ANATOMY)": "knee (anatomy)", "MILLIMETER WAVES": "millimeter waves", "LIQUID LITHIUM": "liquid lithium", "ENTROPY": "entropy", "BIOMASS BURNING": "biomass burning", "NEBULAE": "nebulae", "OPTICAL PROPERTIES": "optical properties", "IMAGE ENHANCEMENT": "image enhancement", "CONTROL CONFIGURED VEHICLES": "control configured vehicles", "VARIATIONAL PRINCIPLES": "variational principles", "MICROSTRUCTURE": "microstructure", "MOUNTING": "mounting", "DAMAGE": "damage", "OCEANOGRAPHY": "oceanography", "PHOTOELASTIC MATERIALS": "photoelastic materials", "VORTICITY": "vorticity", "VOLTERRA EQUATIONS": "volterra equations", "GAS JETS": "gas jets", "HEAT PIPES": "heat pipes", "CONTROLLERS": "controllers", "LASER OUTPUTS": "laser outputs", "ARAMID FIBERS": "aramid fibers", "SCHWARZSCHILD METRIC": "schwarzschild metric", "ROTATIONAL SPECTRA": "rotational spectra", "HILBERT SPACE": "hilbert space", "TIME DEPENDENCE": "time dependence", "MOLECULAR STRUCTURE": "molecular structure", "ELECTROMAGNETIC PULSES": "electromagnetic pulses", "IMAGERY": "imagery", "LANDSAT SATELLITES": "landsat satellites", "VARIATIONS": "variations", "GLOBULAR CLUSTERS": "globular clusters", "INFORMATION DISSEMINATION": "information dissemination", "VIBRATION": "vibration", "NONLINEAR OPTICS": "nonlinear optics", "BUOYANCY": "buoyancy", "INDIA": "india", "MODULATION": "modulation", "MOLECULAR RELAXATION": "molecular relaxation", "Q SWITCHED LASERS": "q switched lasers", "SENSITIVITY": "sensitivity", "TECHNOLOGY UTILIZATION": "technology utilization", "FLOW CHARACTERISTICS": "flow characteristics", "OPTICAL WAVEGUIDES": "optical waveguides", "TWO FLUID MODELS": "two fluid models", "SELF ADAPTIVE CONTROL SYSTEMS": "self adaptive control systems", "FIELD-PROGRAMMABLE GATE ARRAYS": "field-programmable gate arrays", "BLOCK DIAGRAMS": "block diagrams", "THERMAL RESISTANCE": "thermal resistance", "SUBSONIC AIRCRAFT": "subsonic aircraft", "SPACE TRANSPORTATION": "space transportation", "ASTRONOMICAL PHOTOMETRY": "astronomical photometry", "ALUMINUM ALLOYS": "aluminum alloys", "CELLS (BIOLOGY)": "cells (biology)", "LINEAR PROGRAMMING": "linear programming", "ENERGY STORAGE": "energy storage", "LIGHT SPEED": "light speed", "AIRCRAFT CONFIGURATIONS": "aircraft configurations", "PROJECT MANAGEMENT": "project management", "IRRADIATION": "irradiation", "QUASARS": "quasars", "RESOLUTION": "resolution", "SIMULATION": "simulation", "PIPELINES": "pipelines", "COMMUNICATION NETWORKS": "communication networks", "PLASMA SPECTRA": "plasma spectra", "DUCTILITY": "ductility", "CONVECTIVE HEAT TRANSFER": "convective heat transfer", "HYPOVOLEMIA": "hypovolemia", "ELECTRO-OPTICS": "electro-optics", "ARMOR": "armor", "APOPTOSIS": "apoptosis", "METHODOLOGY": "methodology", "SOLAR ENERGY CONVERSION": "solar energy conversion", "LIGHT TRANSMISSION": "light transmission", "FAINT OBJECTS": "faint objects", "DATA ACQUISITION": "data acquisition", "CRACKING (FRACTURING)": "cracking (fracturing)", "PHOTONS": "photons", "SOLAR ATMOSPHERE": "solar atmosphere", "HEART RATE": "heart rate", "SILICON NITRIDES": "silicon nitrides", "FUNCTIONS (MATHEMATICS)": "functions (mathematics)", "IONIC COLLISIONS": "ionic collisions", "PHOTOMETERS": "photometers", "CARTILAGE": "cartilage", "INCOMPRESSIBLE FLOW": "incompressible flow", "NICKEL ALLOYS": "nickel alloys", "ORION NEBULA": "orion nebula", "TELEVISION EQUIPMENT": "television equipment", "SEA WATER": "sea water", "DIELECTRICS": "dielectrics", "FAILURE MODES": "failure modes", "MAGNETOHYDRODYNAMIC STABILITY": "magnetohydrodynamic stability", "OLIVINE": "olivine", "VELOCITY DISTRIBUTION": "velocity distribution", "RANDOM LOADS": "random loads", "IN VITRO METHODS AND TESTS": "in vitro methods and tests", "GROUND EFFECT (AERODYNAMICS)": "ground effect (aerodynamics)", "LOAD CARRYING CAPACITY": "load carrying capacity", "SOLAR WIND": "solar wind", "FUNCTIONALLY GRADIENT MATERIALS": "functionally gradient materials", "STIFFNESS MATRIX": "stiffness matrix", "REACTOR MATERIALS": "reactor materials", "RESEARCH AND DEVELOPMENT": "research and development", "METHOD OF MOMENTS": "method of moments", "MECHANICAL PROPERTIES": "mechanical properties", "ENTHALPY": "enthalpy", "SIGNS AND SYMPTOMS": "signs and symptoms", "FIXED WINGS": "fixed wings", "TWO DIMENSIONAL FLOW": "two dimensional flow", "SOFTWARE DEVELOPMENT TOOLS": "software development tools", "REINFORCED SHELLS": "reinforced shells", "TRANSFER FUNCTIONS": "transfer functions", "STELLAR MASS ACCRETION": "stellar mass accretion", "CARBON": "carbon", "THIN FILMS": "thin films", "DYNAMIC RANGE": "dynamic range", "AIR FLOW": "air flow", "CONTINUOUS SPECTRA": "continuous spectra", "COST EFFECTIVENESS": "cost effectiveness", "AEROSPACE INDUSTRY": "aerospace industry", "ASYMPTOTIC METHODS": "asymptotic methods", "WING LOADING": "wing loading", "INFRARED SPECTROSCOPY": "infrared spectroscopy", "ANGLE OF ATTACK": "angle of attack", "HYPERVELOCITY IMPACT": "hypervelocity impact", "PROTEINS": "proteins", "WATER VAPOR": "water vapor", "CMOS": "cmos", "COST REDUCTION": "cost reduction", "IRON COMPOUNDS": "iron compounds", "CROSSLINKING": "crosslinking", "CRAY COMPUTERS": "cray computers", "X RAY DIFFRACTION": "x ray diffraction", "WASTE DISPOSAL": "waste disposal", "LYMAN ALPHA RADIATION": "lyman alpha radiation", "CONTROL SYSTEMS DESIGN": "control systems design", "RUBY LASERS": "ruby lasers", "TAIL SURFACES": "tail surfaces", "REACTOR DESIGN": "reactor design", "VISCOUS FLOW": "viscous flow", "COMMAND GUIDANCE": "command guidance", "SOUTHEAST ASIA": "southeast asia", "WATER": "water", "THERMAL EXPANSION": "thermal expansion", "SHOT NOISE": "shot noise", "JOINTS (ANATOMY)": "joints (anatomy)", "LOADS (FORCES)": "loads (forces)", "STRATIFICATION": "stratification", "PERFORMANCE TESTS": "performance tests", "DELTA FUNCTION": "delta function", "TITANIUM OXIDES": "titanium oxides", "SITE SELECTION": "site selection", "LOWER BODY NEGATIVE PRESSURE": "lower body negative pressure", "INJURIES": "injuries", "ENGINE PARTS": "engine parts", "BLOOD VOLUME": "blood volume", "STRUCTURAL VIBRATION": "structural vibration", "SPACE MISSIONS": "space missions", "HOLOGRAPHY": "holography", "PATHOLOGY": "pathology", "CELL DIVISION": "cell division", "AXES OF ROTATION": "axes of rotation", "VISCOELASTICITY": "viscoelasticity", "AMPLIFICATION": "amplification", "PULSED LASERS": "pulsed lasers", "PRODUCTION ENGINEERING": "production engineering", "GRAPHICAL USER INTERFACE": "graphical user interface", "METALLURGY": "metallurgy", "COMPUTERIZED SIMULATION": "computerized simulation", "IMAGE PROCESSING": "image processing", "RANDOM ACCESS MEMORY": "random access memory", "BIOASTRONAUTICS": "bioastronautics", "BRAIN CIRCULATION": "brain circulation", "REFLECTING TELESCOPES": "reflecting telescopes", "ABSORPTION SPECTROSCOPY": "absorption spectroscopy", "SHOCK HEATING": "shock heating", "LOGIC CIRCUITS": "logic circuits", "ADAPTIVE CONTROL": "adaptive control", "CRYSTALLOGRAPHY": "crystallography", "KINETIC EQUATIONS": "kinetic equations", "ROCK INTRUSIONS": "rock intrusions", "PERTURBATION THEORY": "perturbation theory", "CAST ALLOYS": "cast alloys", "STABILITY": "stability", "FUNCTIONAL ANALYSIS": "functional analysis", "RED SHIFT": "red shift", "CRUDE OIL": "crude oil", "HYDROGEN": "hydrogen", "VELOCITY ERRORS": "velocity errors", "LIGHT BEAMS": "light beams", "SOLAR SYSTEM": "solar system", "DECOMPOSITION": "decomposition", "STEELS": "steels", "MAGNETIC DISTURBANCES": "magnetic disturbances", "HOT STARS": "hot stars", "FAILURE ANALYSIS": "failure analysis", "NEVADA": "nevada", "PERIODIC VARIATIONS": "periodic variations", "ITERATIVE SOLUTION": "iterative solution", "RELAXATION (MECHANICS)": "relaxation (mechanics)", "ELECTROMAGNETIC INTERACTIONS": "electromagnetic interactions", "MAGNETIC MOMENTS": "magnetic moments", "TEST FACILITIES": "test facilities", "MICROWAVE SPECTRA": "microwave spectra", "HORMONES": "hormones", "PLASMA HEATING": "plasma heating", "MATERIALS TESTS": "materials tests", "COSMOLOGY": "cosmology", "MAGNETIC SWITCHING": "magnetic switching", "SPECTRUM ANALYSIS": "spectrum analysis", "FLOW VELOCITY": "flow velocity", "MICRODENSITOMETERS": "microdensitometers", "PERFORMANCE PREDICTION": "performance prediction", "TURBULENCE EFFECTS": "turbulence effects", "SPHERES": "spheres", "MAPS": "maps", "MAGNIFICATION": "magnification", "DIAMAGNETISM": "diamagnetism", "CURRENT DENSITY": "current density", "SPACE SHUTTLES": "space shuttles", "EARTH MOVEMENTS": "earth movements", "XENON": "xenon", "PARTICLE IN CELL TECHNIQUE": "particle in cell technique", "SELF OSCILLATION": "self oscillation", "INCOMPRESSIBLE FLUIDS": "incompressible fluids", "BONE DEMINERALIZATION": "bone demineralization", "SOLAR RADIATION": "solar radiation", "ENERGY CONVERSION EFFICIENCY": "energy conversion efficiency", "NAVIER-STOKES EQUATION": "navier-stokes equation", "ETHYL COMPOUNDS": "ethyl compounds", "CHEMICAL EVOLUTION": "chemical evolution", "HYDROGEN CLOUDS": "hydrogen clouds", "STELLAR MODELS": "stellar models", "SPECTROMETERS": "spectrometers", "SPACE EXPLORATION": "space exploration", "FINITE ELEMENT METHOD": "finite element method", "SEMICONDUCTOR DEVICES": "semiconductor devices", "HIGH RESOLUTION": "high resolution", "FUSION (MELTING)": "fusion (melting)", "MOLECULAR COLLISIONS": "molecular collisions", "INTERFEROMETRY": "interferometry", "DIGITAL TECHNIQUES": "digital techniques", "ROTATING DISKS": "rotating disks", "ANGULAR MOMENTUM": "angular momentum", "DIFFERENTIATION (BIOLOGY)": "differentiation (biology)", "CAVITY RESONATORS": "cavity resonators", "ATOMIC COLLISIONS": "atomic collisions", "DUCTED FLOW": "ducted flow", "SPECTRAL LINE WIDTH": "spectral line width", "RADIATION DOSAGE": "radiation dosage", "FORCED CONVECTION": "forced convection", "MOLECULAR SPECTRA": "molecular spectra", "WAVE GENERATION": "wave generation", "STRESS-STRAIN RELATIONSHIPS": "stress-strain relationships", "OHMS LAW": "ohms law", "SHOCK TUBES": "shock tubes", "POLYNUCLEOTIDES": "polynucleotides", "DOMAINS": "domains", "ELECTRONIC COUNTERMEASURES": "electronic countermeasures", "BLOOD FLOW": "blood flow", "ERROR ANALYSIS": "error analysis", "FILTERS": "filters", "VERTICAL ORIENTATION": "vertical orientation", "STRESSES": "stresses", "ION TEMPERATURE": "ion temperature", "PHOTOGRAPHIC MEASUREMENT": "photographic measurement", "ISOMERIZATION": "isomerization", "SILICON CONTROLLED RECTIFIERS": "silicon controlled rectifiers", "LINE OF SIGHT COMMUNICATION": "line of sight communication", "BREEDER REACTORS": "breeder reactors", "CURRENT DISTRIBUTION": "current distribution", "SOFTWARE ENGINEERING": "software engineering", "ELECTROCARDIOGRAPHY": "electrocardiography", "THETA PINCH": "theta pinch", "VIBRATIONAL SPECTRA": "vibrational spectra", "SIGNATURES": "signatures", "EUROPEAN SPACE PROGRAMS": "european space programs", "MAGNITUDE": "magnitude", "FORMYL IONS": "formyl ions", "REMOTE SENSING": "remote sensing", "SOLAR CORONA": "solar corona", "EARTH RESOURCES": "earth resources", "GRAPHITE-EPOXY COMPOSITES": "graphite-epoxy composites", "BURNERS": "burners", "TUNABLE LASERS": "tunable lasers", "WINDPOWER UTILIZATION": "windpower utilization", "PIEZOELECTRICITY": "piezoelectricity", "CULTURE MEDIA": "culture media", "MARKETING": "marketing", "CATTLE": "cattle", "RADIATION DISTRIBUTION": "radiation distribution", "IMAGE RESOLUTION": "image resolution", "ALGORITHMS": "algorithms", "MAGNETIC FLUX": "magnetic flux", "REAGENTS": "reagents", "MAGNETIC SURVEYS": "magnetic surveys", "ADIPOSE TISSUES": "adipose tissues", "CURRENT SHEETS": "current sheets", "DIFFUSION": "diffusion", "GRAPHS (CHARTS)": "graphs (charts)", "THIN WALLED SHELLS": "thin walled shells", "BOUNDARY LAYER FLOW": "boundary layer flow", "MONTE CARLO METHOD": "monte carlo method", "LAMINAR FLOW": "laminar flow", "CIRCULAR ORBITS": "circular orbits", "SHOCK WAVES": "shock waves", "HIGH TEMPERATURE ENVIRONMENTS": "high temperature environments", "FETUSES": "fetuses", "GROUND WIND": "ground wind", "WAVE EXCITATION": "wave excitation", "SOFTWARE RELIABILITY": "software reliability", "PHOTOSPHERE": "photosphere", "BALLISTICS": "ballistics", "MICROPROCESSORS": "microprocessors", "BOUNDARY CONDITIONS": "boundary conditions", "WEATHER FORECASTING": "weather forecasting", "BURNING RATE": "burning rate", "WATER WAVES": "water waves", "MINERALOGY": "mineralogy", "PHASE VELOCITY": "phase velocity", "BONES": "bones", "CYTOLOGY": "cytology", "SYNTHETIC RESINS": "synthetic resins", "APPROXIMATION": "approximation", "RADIATION HARDENING": "radiation hardening", "EARTH OBSERVATIONS (FROM SPACE)": "earth observations (from space)", "PHOTOGRAPHIC PLATES": "photographic plates", "FIREBALLS": "fireballs", "CERENKOV RADIATION": "cerenkov radiation", "SOLAR CELLS": "solar cells", "POLYMER MATRIX COMPOSITES": "polymer matrix composites", "ADULTS": "adults", "ELECTRIC SPARKS": "electric sparks", "CONFERENCES": "conferences", "SURFACE TO AIR MISSILES": "surface to air missiles", "ENERGY CONSERVATION": "energy conservation", "MAGNETIC ANOMALIES": "magnetic anomalies", "ACOUSTIC EMISSION": "acoustic emission", "SUPERSONIC NOZZLES": "supersonic nozzles", "SERUMS": "serums", "RIBONUCLEIC ACIDS": "ribonucleic acids", "REYNOLDS NUMBER": "reynolds number", "ARITHMETIC": "arithmetic", "CORRELATION COEFFICIENTS": "correlation coefficients", "MICE": "mice", "ETHANE": "ethane", "ASTROMETRY": "astrometry", "CRYSTALLIZATION": "crystallization", "THERMOELASTICITY": "thermoelasticity", "CORONAL HOLES": "coronal holes", "ATMOSPHERIC COMPOSITION": "atmospheric composition", "TURBULENT MIXING": "turbulent mixing", "COMPUTER PROGRAMMING": "computer programming", "QUADRATURES": "quadratures", "RELATIVISTIC PARTICLES": "relativistic particles", "CONVERGENT-DIVERGENT NOZZLES": "convergent-divergent nozzles", "THERMODYNAMIC PROPERTIES": "thermodynamic properties", "MOLYBDENUM": "molybdenum", "NASA PROGRAMS": "nasa programs", "PLANAR STRUCTURES": "planar structures", "SURGERY": "surgery", "MESSAGE PROCESSING": "message processing", "ASTRONOMICAL PHOTOGRAPHY": "astronomical photography", "VAPORIZING": "vaporizing", "AIRCRAFT CONTROL": "aircraft control", "TISSUE ENGINEERING": "tissue engineering", "ADENOSINE TRIPHOSPHATE": "adenosine triphosphate", "RATS": "rats", "PHOTOVOLTAIC CONVERSION": "photovoltaic conversion", "GENETIC ALGORITHMS": "genetic algorithms", "LOW TEMPERATURE": "low temperature", "GREEN'S FUNCTIONS": "green's functions", "MAGNETOHYDRODYNAMIC GENERATORS": "magnetohydrodynamic generators", "COMPUTER GRAPHICS": "computer graphics", "ELASTIC PROPERTIES": "elastic properties", "PLASMA CONTROL": "plasma control", "NUCLEAR FUELS": "nuclear fuels", "DISEASES": "diseases", "GAS FLOW": "gas flow", "COMBUSTION CHAMBERS": "combustion chambers", "ENERGY TECHNOLOGY": "energy technology", "OXYGEN": "oxygen", "DATA REDUCTION": "data reduction", "ISOTHERMAL PROCESSES": "isothermal processes", "SINGULARITY (MATHEMATICS)": "singularity (mathematics)", "FIZEAU EFFECT": "fizeau effect", "VELOCITY MEASUREMENT": "velocity measurement", "PLASMA POTENTIALS": "plasma potentials", "HIGH SPEED": "high speed", "WORLD WIDE WEB": "world wide web", "CIRCULAR TUBES": "circular tubes", "SUBMILLIMETER WAVES": "submillimeter waves", "SURFACE WAVES": "surface waves", "TENSORS": "tensors", "FREQUENCIES": "frequencies", "POWDER METALLURGY": "powder metallurgy", "HABITATS": "habitats", "SCRAMBLING (COMMUNICATION)": "scrambling (communication)", "RADIOACTIVE WASTES": "radioactive wastes", "RING LASERS": "ring lasers", "UNSTEADY FLOW": "unsteady flow", "POROUS BOUNDARY LAYER CONTROL": "porous boundary layer control", "BLOOD CIRCULATION": "blood circulation", "TRANSPORT AIRCRAFT": "transport aircraft", "SAFETY MANAGEMENT": "safety management", "CONTINUITY EQUATION": "continuity equation", "ROOM TEMPERATURE": "room temperature", "PERMALLOYS (TRADEMARK)": "permalloys (trademark)", "PLASMA DENSITY": "plasma density", "TECHNOLOGY ASSESSMENT": "technology assessment", "PRESSURE DISTRIBUTION": "pressure distribution", "MANAGEMENT PLANNING": "management planning", "GRID GENERATION (MATHEMATICS)": "grid generation (mathematics)", "ELECTRON MICROSCOPY": "electron microscopy", "QUANTUM ELECTRONICS": "quantum electronics", "RADIAL VELOCITY": "radial velocity", "FINITE DIFFERENCE THEORY": "finite difference theory", "MALES": "males", "RADII": "radii", "LASER MODES": "laser modes", "RANDOM NOISE": "random noise", "DROSOPHILA": "drosophila", "CRYOGENIC COOLING": "cryogenic cooling", "TRANSPORTATION ENERGY": "transportation energy", "ENERGY DISSIPATION": "energy dissipation", "BOUNDARY VALUE PROBLEMS": "boundary value problems", "BLANKETS (FUSION REACTORS)": "blankets (fusion reactors)", "ROTARY WING AIRCRAFT": "rotary wing aircraft", "EMISSION SPECTRA": "emission spectra", "OBJECT-ORIENTED PROGRAMMING": "object-oriented programming", "PRODUCTION MANAGEMENT": "production management", "FLOW GEOMETRY": "flow geometry", "CHIPS (ELECTRONICS)": "chips (electronics)", "RISK": "risk", "HANKEL FUNCTIONS": "hankel functions", "DERIVATION": "derivation", "STATIC DEFORMATION": "static deformation", "AGRICULTURE": "agriculture", "POLYATOMIC MOLECULES": "polyatomic molecules", "PHOTOGRAMMETRY": "photogrammetry", "PHYSICAL OPTICS": "physical optics", "AXES (REFERENCE LINES)": "axes (reference lines)", "JAVA (PROGRAMMING LANGUAGE)": "java (programming language)", "FABRICS": "fabrics", "SILICON FILMS": "silicon films", "PLASMA DIAGNOSTICS": "plasma diagnostics", "INTERNATIONAL COOPERATION": "international cooperation", "LUMINOUS INTENSITY": "luminous intensity", "WAVE PROPAGATION": "wave propagation", "LINEAR ENERGY TRANSFER (LET)": "linear energy transfer (let)", "SOLAR CORPUSCULAR RADIATION": "solar corpuscular radiation", "GEOSAT SATELLITES": "geosat satellites", "FINITE VOLUME METHOD": "finite volume method", "COORDINATE TRANSFORMATIONS": "coordinate transformations", "CONTROL THEORY": "control theory", "ATLANTIC OCEAN": "atlantic ocean", "STOCHASTIC PROCESSES": "stochastic processes", "ASSESSMENTS": "assessments", "NOISE REDUCTION": "noise reduction", "MAGNETOHYDRODYNAMIC FLOW": "magnetohydrodynamic flow", "ION BEAMS": "ion beams", "ASTRONOMICAL CATALOGS": "astronomical catalogs", "GAS LASERS": "gas lasers", "SCREWS": "screws", "HELMETS": "helmets", "INTERSTELLAR MATTER": "interstellar matter", "IDEAL GAS": "ideal gas", "METEOROIDS": "meteoroids", "HIGH TEMPERATURE PLASMAS": "high temperature plasmas", "GENETICS": "genetics", "COMPUTER PROGRAMS": "computer programs", "EARTH SURFACE": "earth surface", "BODIES OF REVOLUTION": "bodies of revolution", "SOUTH CAROLINA": "south carolina", "ECONOMIC ANALYSIS": "economic analysis", "LAPLACE TRANSFORMATION": "laplace transformation", "REGENERATIVE COOLING": "regenerative cooling", "OSCILLATORS": "oscillators", "ROTORS": "rotors", "RESEARCH": "research", "DEOXYRIBONUCLEIC ACID": "deoxyribonucleic acid", "ROOT-MEAN-SQUARE ERRORS": "root-mean-square errors", "HYDROCYANIC ACID": "hydrocyanic acid", "HELIUM": "helium", "VAPOR PHASE EPITAXY": "vapor phase epitaxy", "NONFERROUS METALS": "nonferrous metals", "SPACELAB": "spacelab", "WAFERS": "wafers", "STELLAR MASS": "stellar mass", "SLENDER BODIES": "slender bodies", "LINEAR ACCELERATORS": "linear accelerators", "TABLES (DATA)": "tables (data)", "ION ACOUSTIC WAVES": "ion acoustic waves", "WALLS": "walls", "RADIOGRAPHY": "radiography", "CYANIDES": "cyanides", "X RAY SOURCES": "x ray sources", "TIME SIGNALS": "time signals", "STATISTICAL ANALYSIS": "statistical analysis", "CHARGE COUPLED DEVICES": "charge coupled devices", "WIND TUNNEL MODELS": "wind tunnel models", "COHERENT LIGHT": "coherent light", "ENVIRONMENT EFFECTS": "environment effects", "THICKNESS": "thickness", "LINEAR SYSTEMS": "linear systems", "PLANETS": "planets", "SATELLITE OBSERVATION": "satellite observation", "KERR MAGNETOOPTICAL EFFECT": "kerr magnetooptical effect", "OSTEOBLASTS": "osteoblasts", "CARBON MONOXIDE LASERS": "carbon monoxide lasers", "WINDMILLS (WINDPOWERED MACHINES)": "windmills (windpowered machines)", "SIGNAL PROCESSING": "signal processing", "OPTIMIZATION": "optimization", "HIGH SPEED CAMERAS": "high speed cameras", "THERMAL STRESSES": "thermal stresses", "ELECTRON BUNCHING": "electron bunching", "LASER CAVITIES": "laser cavities", "LUNAR BASES": "lunar bases", "LOAD DISTRIBUTION (FORCES)": "load distribution (forces)", "FUEL COMBUSTION": "fuel combustion", "CARBON MONOXIDE": "carbon monoxide", "KEPLER LAWS": "kepler laws", "SUPERSONIC FLOW": "supersonic flow", "STEADY FLOW": "steady flow", "SYNCOPE": "syncope", "CARDIAC OUTPUT": "cardiac output", "AERONAUTICAL ENGINEERING": "aeronautical engineering", "MATRICES (MATHEMATICS)": "matrices (mathematics)", "AERODYNAMIC CHARACTERISTICS": "aerodynamic characteristics", "STRUCTURAL PROPERTIES (GEOLOGY)": "structural properties (geology)", "TEMPERATURE MEASUREMENT": "temperature measurement", "WIND VANES": "wind vanes", "BLACK HOLES (ASTRONOMY)": "black holes (astronomy)", "ELECTRONIC EQUIPMENT": "electronic equipment", "POROSITY": "porosity", "FEMALES": "females", "SHELL STABILITY": "shell stability", "GEOELECTRICITY": "geoelectricity", "IMPACT RESISTANCE": "impact resistance", "POLYNOMIALS": "polynomials", "ANNUAL VARIATIONS": "annual variations", "FIELD EFFECT TRANSISTORS": "field effect transistors", "ELECTROMECHANICS": "electromechanics", "LIFE SUPPORT SYSTEMS": "life support systems", "PLY ORIENTATION": "ply orientation", "COST ANALYSIS": "cost analysis", "ALUMINUM NITRIDES": "aluminum nitrides", "SOUNDING": "sounding", "TROPOSPHERE": "troposphere", "MODULARITY": "modularity", "LUMINOSITY": "luminosity", "THERMAL REACTORS": "thermal reactors", "INTERLAYERS": "interlayers", "LANDING GEAR": "landing gear", "CYLINDRICAL BODIES": "cylindrical bodies", "MOLECULAR CLOUDS": "molecular clouds", "SCANNING": "scanning", "EQUATIONS OF STATE": "equations of state", "APPLICATIONS PROGRAMS (COMPUTERS)": "applications programs (computers)", "CHARACTERIZATION": "characterization", "INTERNETS": "internets", "HORMONE METABOLISMS": "hormone metabolisms", "SYNTHETIC FIBERS": "synthetic fibers", "TRANSPLANTATION": "transplantation", "TRANSPORT THEORY": "transport theory", "PARALLAX": "parallax", "BIOSYNTHESIS": "biosynthesis", "CONTAMINANTS": "contaminants", "CUSHIONS": "cushions", "SPACE TUGS": "space tugs", "IMAGING TECHNIQUES": "imaging techniques", "SOLID STATE DEVICES": "solid state devices", "PACKET SWITCHING": "packet switching", "TIME RESPONSE": "time response", "ANALOGS": "analogs", "INFRARED LASERS": "infrared lasers", "VORTICITY EQUATIONS": "vorticity equations", "ACRYLONITRILES": "acrylonitriles", "LIGHT SOURCES": "light sources", "DIGITAL COMPUTERS": "digital computers", "GAS TURBINE ENGINES": "gas turbine engines", "PROTECTIVE CLOTHING": "protective clothing", "NATURAL GAS": "natural gas", "MATHEMATICAL MODELS": "mathematical models", "COMPUTATIONAL FLUID DYNAMICS": "computational fluid dynamics", "GAS EXPLOSIONS": "gas explosions", "STRUCTURAL DESIGN": "structural design", "USER REQUIREMENTS": "user requirements", "CHANNEL FLOW": "channel flow", "LIGHT EMISSION": "light emission", "STROKE VOLUME": "stroke volume", "GLUCOCORTICOIDS": "glucocorticoids", "OLIGONUCLEOTIDES": "oligonucleotides", "MOLECULAR SPECTROSCOPY": "molecular spectroscopy", "MOLTEN SALT ELECTROLYTES": "molten salt electrolytes", "AERODYNAMIC COEFFICIENTS": "aerodynamic coefficients", "SOLAR TERRESTRIAL INTERACTIONS": "solar terrestrial interactions", "INTEGRAL EQUATIONS": "integral equations", "FACTORIZATION": "factorization", "CYLINDRICAL SHELLS": "cylindrical shells", "ACTIVE CONTROL": "active control", "COMPUTER INFORMATION SECURITY": "computer information security", "TIME FUNCTIONS": "time functions", "DYNAMIC LOADS": "dynamic loads", "COMPUTATION": "computation", "QUANTUM STATISTICS": "quantum statistics", "TWO PHASE FLOW": "two phase flow", "INTERNATIONAL QUIET SUN YEAR": "international quiet sun year", "CREEP PROPERTIES": "creep properties", "ELECTRON ENERGY": "electron energy", "RAMAN SPECTRA": "raman spectra", "STABILITY AUGMENTATION": "stability augmentation", "RADIO FREQUENCY DISCHARGE": "radio frequency discharge", "LYMAN BETA RADIATION": "lyman beta radiation", "BIOREACTORS": "bioreactors", "GAS GIANT PLANETS": "gas giant planets", "GEOMAGNETISM": "geomagnetism", "SYSTEMS STABILITY": "systems stability", "LATE STARS": "late stars", "SHARPNESS": "sharpness", "TRUSSES": "trusses", "HYDRODYNAMIC EQUATIONS": "hydrodynamic equations", "MOMENT DISTRIBUTION": "moment distribution", "DYNAMIC RESPONSE": "dynamic response", "SUPERCONDUCTORS (MATERIALS)": "superconductors (materials)", "ULTRAVIOLET RADIATION": "ultraviolet radiation", "MOLECULAR ROTATION": "molecular rotation", "PETROLOGY": "petrology", "HYDRODYNAMICS": "hydrodynamics", "UNDERGROUND STRUCTURES": "underground structures", "LASERS": "lasers", "AXISYMMETRIC FLOW": "axisymmetric flow", "THERMODYNAMICS": "thermodynamics", "NUCLEOTIDES": "nucleotides", "X RAY ANALYSIS": "x ray analysis", "CULTURED CELLS": "cultured cells", "MANUAL CONTROL": "manual control", "LAMINATES": "laminates", "CROP GROWTH": "crop growth", "TISSUE CULTURING": "tissue culturing", "COMPUTATIONAL GRIDS": "computational grids", "PARTICLE INTERACTIONS": "particle interactions", "PYRANOMETERS": "pyranometers", "AERODYNAMIC INTERFERENCE": "aerodynamic interference", "SUPERCOMPUTERS": "supercomputers", "LINE SPECTRA": "line spectra", "LUNAR SURFACE": "lunar surface", "TELESCOPES": "telescopes", "LINEAR QUADRATIC GAUSSIAN CONTROL": "linear quadratic gaussian control", "OSTEOGENESIS": "osteogenesis", "LAMINAR HEAT TRANSFER": "laminar heat transfer", "STRESS DISTRIBUTION": "stress distribution", "SOFTWARE REUSE": "software reuse", "FREQUENCY RANGES": "frequency ranges", "CALCIUM": "calcium", "CAMERAS": "cameras", "IMAGE CONTRAST": "image contrast", "FURNACES": "furnaces", "CONVECTION": "convection", "NICKEL OXIDES": "nickel oxides", "A-4 AIRCRAFT": "a-4 aircraft", "METABOLIC DISEASES": "metabolic diseases", "ARGON": "argon", "TISSUES (BIOLOGY)": "tissues (biology)", "SIGNAL ENCODING": "signal encoding", "GLASS FIBER REINFORCED PLASTICS": "glass fiber reinforced plastics", "GALLIUM ARSENIDES": "gallium arsenides", "SOLUTES": "solutes", "ONCOGENES": "oncogenes", "SUBROUTINES": "subroutines", "AERIAL PHOTOGRAPHY": "aerial photography", "EIGENVALUES": "eigenvalues", "PROTOCOL (COMPUTERS)": "protocol (computers)", "THREE DIMENSIONAL MODELS": "three dimensional models", "STRESS CORROSION CRACKING": "stress corrosion cracking", "ESTIMATING": "estimating", "METEORITE COLLISIONS": "meteorite collisions", "FLAT LAYERS": "flat layers", "ABUNDANCE": "abundance", "ENERGY TRANSFER": "energy transfer", "INEQUALITIES": "inequalities", "ELECTRIC GENERATORS": "electric generators", "WAVE FRONT RECONSTRUCTION": "wave front reconstruction", "EXPERIMENT DESIGN": "experiment design", "COMPOSITE WRAPPING": "composite wrapping", "WAVE DISPERSION": "wave dispersion", "MAGNETIC FIELDS": "magnetic fields", "HOLES (MECHANICS)": "holes (mechanics)", "CHEMICAL BONDS": "chemical bonds"} -------------------------------------------------------------------------------- /models/test/test_inds.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/test_inds.npy -------------------------------------------------------------------------------- /models/test/train_inds.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/train_inds.npy -------------------------------------------------------------------------------- /models/test/vectorizer.jbl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/models/test/vectorizer.jbl -------------------------------------------------------------------------------- /notebook/.gitignore: -------------------------------------------------------------------------------- 1 | onnx_models 2 | *.db 3 | *.db-journal 4 | *.hdf5 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==1.1.1 2 | requests==2.20.0 3 | urllib3==1.26.5 4 | PyYAML==5.4 5 | setuptools-scm==3.3.3 6 | gevent==1.4.0 7 | gunicorn==20.0.4 8 | -------------------------------------------------------------------------------- /service/.dockerignore: -------------------------------------------------------------------------------- 1 | config/* 2 | !config/default_config.py 3 | -------------------------------------------------------------------------------- /service/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* 2 | -------------------------------------------------------------------------------- /service/__init__.py: -------------------------------------------------------------------------------- 1 | import service.app 2 | -------------------------------------------------------------------------------- /service/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from pathlib import Path 5 | 6 | import dsconcept.get_metrics as gm 7 | import joblib 8 | import yaml 9 | from flask import request, jsonify, render_template, Flask 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | LOG = logging.getLogger(__name__) 13 | LOG.setLevel(logging.INFO) 14 | 15 | app = Flask(__name__) 16 | app.config["MODELS_DIR"] = os.environ["MODELS_DIR"] 17 | LOG.info(f"Using MODELS_DIR \"{app.config['MODELS_DIR']}\"") 18 | 19 | FIND_TERMS_METHOD_NAME = "findterms" 20 | ALLOWED_FIND_TERMS_FIELDS = [ 21 | "text", 22 | "probability_threshold", 23 | "topic_threshold", 24 | "request_id", 25 | ] 26 | 27 | SERVICE_VERSION = os.environ["VERSION"] if "VERSION" in os.environ else "unspecified" 28 | 29 | if "PRELOAD" in os.environ: 30 | if os.environ["PRELOAD"].lower().strip() == "true": 31 | PRELOAD = True 32 | LOG.info("Preloading the models.") 33 | else: 34 | PRELOAD = False 35 | else: 36 | LOG.info("Not PRELOAD env variable found. Default to PRELOAD=True") 37 | PRELOAD = True 38 | 39 | INTERFACE_VERSION = "2.0.0" 40 | SERVICE_ROOT_PATH = "" 41 | 42 | app.config.update( 43 | dict( 44 | CATEGORIES_DIR=f"{app.config['MODELS_DIR']}/categories/models", 45 | CONCEPTS_DIR=f"{app.config['MODELS_DIR']}/keywords/models", 46 | IN_KWD_RAW2LEMMA=f"{app.config['MODELS_DIR']}/kwd_raw2lemma.json", 47 | IN_CAT_RAW2LEMMA=f"{app.config['MODELS_DIR']}/cat_raw2lemma.json", 48 | IN_VECTORIZER=f"{app.config['MODELS_DIR']}/vectorizer.jbl", 49 | STI_CONFIG=f"{app.config['MODELS_DIR']}/config.yml", 50 | PRELOAD=PRELOAD, # TODO: visible configuration for preload option 51 | LOC_DICT=Path("loc_dict.jbl"), 52 | ) 53 | ) 54 | 55 | 56 | def load_models_wrapper(): 57 | """ 58 | If not preloading and mapping from (topic, keyword) to model paths already exists, 59 | just load it directly. Otherwise, load in all models and create the file. 60 | """ 61 | if (not app.config['PRELOAD']) and (app.config['LOC_DICT'].exists()): 62 | cd = joblib.load(app.config['LOC_DICT']) 63 | elif not app.config['PRELOAD']: 64 | cd = gm.load_concept_models(app.config["CONCEPTS_DIR"], 65 | load=app.config['PRELOAD']) 66 | joblib.dump(cd, app.config['LOC_DICT']) 67 | else: 68 | cd = gm.load_concept_models(app.config["CONCEPTS_DIR"], 69 | load=app.config['PRELOAD']) 70 | return cd 71 | 72 | 73 | def init(): 74 | LOG.info("Loading lemma dictionaries.") 75 | with open(app.config["IN_KWD_RAW2LEMMA"], "r") as f0: 76 | app.config["KWD_RAW2LEMMA"] = json.load(f0) 77 | app.config["KWD_LEMMA2RAW"] = {v: k for k, v in app.config["KWD_RAW2LEMMA"].items()} 78 | 79 | with open(app.config["IN_CAT_RAW2LEMMA"], "r") as f0: 80 | app.config["CAT_RAW2LEMMA"] = json.load(f0) 81 | app.config["CAT_LEMMA2RAW"] = {v: k for k, v in app.config["CAT_RAW2LEMMA"].items()} 82 | 83 | with open(app.config["STI_CONFIG"], "r") as f0: 84 | cfg = yaml.safe_load(f0) 85 | weights = cfg["weights"] 86 | app.config["WEIGHTS"] = weights 87 | 88 | LOG.info("Loading hierarchical classifier.") 89 | cat_clfs = gm.load_category_models(app.config["CATEGORIES_DIR"]) 90 | cd = load_models_wrapper() 91 | hclf = gm.HierarchicalClassifier(cat_clfs, cd) 92 | hclf.load_vectorizer(app.config["IN_VECTORIZER"]) 93 | app.config["HIER_CLF"] = hclf 94 | LOG.info('Ready for requests.') 95 | 96 | 97 | def classify_sti(texts, cat_threshold, kwd_threshold, no_categories): 98 | if type(texts) == str: 99 | texts = [texts] 100 | features, feature_matrix = app.config["HIER_CLF"].vectorize( 101 | texts, app.config["WEIGHTS"] 102 | ) 103 | cat_preds, concept_preds = app.config["HIER_CLF"].predict( 104 | feature_matrix, cat_threshold, kwd_threshold, no_categories 105 | ) 106 | cat_records = [ 107 | [ 108 | { 109 | "keyword": t[0], 110 | "probability": float(t[1]), 111 | "unstemmed": app.config["CAT_LEMMA2RAW"][t[0]], 112 | } 113 | for t in record_preds 114 | ] 115 | for record_preds in cat_preds 116 | ] 117 | concept_records = [ 118 | [ 119 | { 120 | "keyword": t[0], 121 | "probability": float(t[1]), 122 | "unstemmed": app.config["KWD_LEMMA2RAW"][t[0]], 123 | } 124 | for t in record_preds 125 | ] 126 | for record_preds in concept_preds 127 | ] 128 | if type(texts) == str: # only return one if passed string instead of list 129 | LOG.info("Single example passed, returning single list.") 130 | features = features[0] 131 | cat_records = cat_records[0] 132 | concept_records = concept_records[0] 133 | return features, cat_records, concept_records 134 | 135 | 136 | def _abort(code, msg, usage=True): 137 | if usage: 138 | msg += " " + _find_terms_usage() + "'" 139 | 140 | response = jsonify(service_version=SERVICE_VERSION, msg=msg) 141 | response.status_code = code 142 | return response 143 | 144 | 145 | def _find_terms_usage(joinstr="', '", prestr="'"): 146 | return ( 147 | "Allowed request fields for " 148 | + FIND_TERMS_METHOD_NAME 149 | + " method are " 150 | + prestr 151 | + joinstr.join(ALLOWED_FIND_TERMS_FIELDS) 152 | ) 153 | 154 | 155 | def _validate(data): 156 | for key in data: 157 | if key not in ALLOWED_FIND_TERMS_FIELDS: 158 | LOG.warning(f'"{key}" not in {ALLOWED_FIND_TERMS_FIELDS}') 159 | return False 160 | return True 161 | 162 | 163 | # Load all of the data here. 164 | init() 165 | 166 | 167 | @app.route(f"{SERVICE_ROOT_PATH}/") 168 | def home(): 169 | LOG.info(f"Loading page: {request.host}/{request.path}") 170 | if "HIER_CLF" not in app.config: 171 | init() 172 | return render_template( 173 | "home.html", 174 | version=SERVICE_VERSION, 175 | interface_version=INTERFACE_VERSION, 176 | root=SERVICE_ROOT_PATH, 177 | service_url=f"{request.host}/{request.path}", 178 | methodname=FIND_TERMS_METHOD_NAME, 179 | usage=_find_terms_usage(joinstr='", "', prestr='"') + '"', 180 | ) 181 | 182 | 183 | @app.route(f"{SERVICE_ROOT_PATH}/{FIND_TERMS_METHOD_NAME}/", methods=["POST"]) 184 | def find_terms(): 185 | if "HIER_CLF" not in app.config: 186 | init() 187 | try: 188 | LOG.debug("Requesting information.") 189 | data = request.get_json(force=True) 190 | LOG.debug(f"Data received {str(data)}.") 191 | 192 | if not _validate(data): 193 | return _abort(400, "Bad Request incorrect field passed.") 194 | if "text" not in data: 195 | return _abort(400, "NO passed text field ..._abort.") 196 | else: 197 | text = data.get("text") 198 | if float(data.get("topic_threshold")) == 1.0: 199 | no_cats = True 200 | else: 201 | no_cats = False 202 | params = { 203 | "proba_lim": float(data.get("probability_threshold")), 204 | "topic_threshold": float(data.get("topic_threshold")), 205 | "request_id": str(data.get("request_id")), 206 | "no_categories": no_cats, 207 | } 208 | LOG.info(f'probability limit: {params["proba_lim"]}') 209 | LOG.info(f'request_id: {params["request_id"]}') 210 | features, cat_records, concept_records = classify_sti( 211 | text, 212 | params["topic_threshold"], 213 | params["proba_lim"], 214 | params["no_categories"], 215 | ) 216 | payload = { 217 | "sti_keywords": concept_records, 218 | "features": features, 219 | "topic_probabilities": cat_records, 220 | "request_id": params["request_id"], 221 | "probability_threshold": params["proba_lim"], 222 | "topic_threshold": params["topic_threshold"], 223 | } 224 | LOG.info("Request Complete") 225 | return jsonify( 226 | status="okay", 227 | code=200, 228 | messages=[], 229 | service_version=SERVICE_VERSION, 230 | interface_version=INTERFACE_VERSION, 231 | payload=payload, 232 | ) 233 | 234 | except Exception as ex: 235 | etype = type(ex) 236 | print(str(ex)) 237 | 238 | if etype == ValueError or "BadRequest" in str(etype): 239 | return _abort(400, str(ex) + ".") 240 | else: 241 | print("Service Exception. Msg: " + str(type(ex))) 242 | return _abort(500, "Internal Service Error", usage=False) 243 | 244 | 245 | if __name__ == "__main__": 246 | port = os.getenv("PORT") 247 | if port is None: 248 | LOG.info("Setting port to default value of 5000.") 249 | port = 5000 250 | app.run( 251 | host="0.0.0.0", port=int(port), threaded=True, 252 | ) 253 | -------------------------------------------------------------------------------- /service/config/default_config.py: -------------------------------------------------------------------------------- 1 | MODELS_DIR = "models/experiment" 2 | -------------------------------------------------------------------------------- /service/static/media/NASA_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/service/static/media/NASA_logo.png -------------------------------------------------------------------------------- /service/static/media/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/service/static/media/ajax-loader.gif -------------------------------------------------------------------------------- /service/static/style/style.css: -------------------------------------------------------------------------------- 1 | h1 { 2 | padding: 0px 20px; 3 | font-family: 'Montserrat', sans-serif; 4 | font-weight:bold 5 | } 6 | 7 | h2 { 8 | padding: 0px 120px; 9 | font-family: 'Montserrat', sans-serif; 10 | } 11 | 12 | .returnBox{ 13 | padding: 0px 0px; 14 | } 15 | 16 | .returnBox h2{ 17 | padding: 0px 0px; 18 | margin: 0px 130px 0px 30px; 19 | } 20 | 21 | .background h2 { 22 | margin: 0px 130px 0px 150px; 23 | padding: 0px 0px 0px 0px; 24 | } 25 | 26 | p { 27 | padding: 5px 150px; 28 | font-family: 'Mukta', sans-serif; 29 | } 30 | 31 | #return { 32 | margin: 0px 0px 10px 0px; 33 | } 34 | 35 | .clientBox p { 36 | margin: 0px 0px; 37 | } 38 | 39 | textarea { 40 | font-size: 12px; 41 | width: 95%; 42 | height: 60%; 43 | } 44 | 45 | ul { 46 | padding: 1px 165px; 47 | font-family: 'Mukta', sans-serif; 48 | } 49 | 50 | p.side { 51 | padding: 10px 50px 0px 150px; 52 | display: inline-block; 53 | } 54 | 55 | iframe { 56 | background-color: #F5F5F5; 57 | } 58 | 59 | div.background { 60 | overflow: auto; 61 | background-color: #f9f7f4; 62 | padding: 20px 0px; 63 | width: 100%; 64 | height: 100%; 65 | } 66 | 67 | div.clientBox { 68 | background-color: #f9f7f4; 69 | float: left; 70 | width: 50%; 71 | } 72 | 73 | div.clientBoxR { 74 | background-color: #f9f7f4; 75 | float: left; 76 | width: 50%; 77 | } 78 | 79 | 80 | div.return { 81 | /*margin: 2px 0px 0px 2px;*/ 82 | border-width: 2px 0px 0px 2px; 83 | border-style: solid; 84 | border-color: #6c6c6c; 85 | margin: 0px 30px 0px 0px; 86 | overflow-y: scroll; 87 | width: 85%; 88 | height: 500px; 89 | background-color: #FFF; 90 | } 91 | /*height: 500px;*/ 92 | 93 | div#request { 94 | margin: 0px 0px 10px 150px; 95 | padding: 1px 5px; 96 | border-width: 2px 0px 0px 2px; 97 | border-style: solid; 98 | border-color: #6c6c6c; 99 | overflow: hidden; 100 | overflow-x: scroll; 101 | width: 75%; 102 | height: 50px; 103 | background-color: #F5F5F5; 104 | } 105 | 106 | img#loading { 107 | display: none; 108 | } 109 | 110 | #version { 111 | font-family: "Montserrat", sans-serif; 112 | color: grey; 113 | font-size: 20px; 114 | 115 | } 116 | 117 | .accordion { 118 | background-color: #eee; 119 | color: #444; 120 | cursor: pointer; 121 | padding: 18px 130px 18px 150px; 122 | width: 100%; 123 | border: none; 124 | text-align: left; 125 | outline: none; 126 | font-size: 15px; 127 | transition: 0.4s; 128 | } 129 | 130 | .active, .accordion:hover { 131 | background-color: #ccc; 132 | } 133 | 134 | .panel { 135 | padding: 0 18px; 136 | display: none; 137 | background-color: white; 138 | overflow: hidden; 139 | } 140 | -------------------------------------------------------------------------------- /service/templates/home.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NASA Concept Tagging API 5 | 6 | 7 | 10 | 11 | 12 | 13 | 47 | 48 | 49 |

50 | NASA logo 51 |  NASA Concept Tagging API (version: {{ version }}) 52 |

53 | 54 | 55 |
56 |

This service contains a single endpoint, "{{ root }}/{{ methodname }}/", which may be used to extract concepts from passed text. The models which support this API were trained on over 3.5 million documents from NASA Scientific and Technical Information (STI) papers.

57 | 62 |

63 | You can access the endpoint using the interface below or by using standard web request technologies. For example, using curl: 64 |

65 |

 66 |     
67 |

68 | 69 |
70 | 71 | 87 | 88 |
89 | 90 |
91 |

Client

92 | 93 |

94 | 95 | 96 |
97 | 98 | 99 |
100 | 101 | 102 |

103 | 104 |

105 | Enter some text you want to find terms for in the box below:
106 | 107 |
108 | 109 |

110 | 111 |
112 |
113 |

114 |

Return Data  

115 | 116 |
117 |

118 |     
119 |
120 |
121 | 125 | 126 | -------------------------------------------------------------------------------- /src/get_tag_names.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | from pathlib import Path 4 | import json 5 | 6 | import joblib 7 | from tqdm import tqdm 8 | 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | LOG = logging.getLogger(__name__) 12 | LOG.setLevel(logging.INFO) 13 | 14 | def get_dir_cons(mdir): 15 | cons = [] 16 | for p in tqdm(mdir.iterdir()): 17 | c = joblib.load(p)['concept'] 18 | cons.append(c) 19 | return cons 20 | 21 | 22 | def main(model_dir, text_dir): 23 | LOG.info(f'Reading models from {model_dir}.') 24 | 25 | d = { 26 | 'cat': model_dir / 'categories/models', 27 | 'kwd': model_dir / 'keywords/models/topic_', 28 | } 29 | for t, p in d.items(): 30 | raw2lemma_loc = model_dir / f'{t}_raw2lemma.json' 31 | with open(raw2lemma_loc, 'r') as f0: 32 | raw2lemma = json.load(f0) 33 | lemma2raw = {v: k for k, v in raw2lemma.items()} 34 | tags = get_dir_cons(p) 35 | raw_tags = [lemma2raw[c] for c in tags] 36 | out_p = model_dir / f"{t}_list.txt" 37 | LOG.info(f'Writing {len(tags)} {t}s to {out_p}.') 38 | with open(out_p, 'w') as f0: 39 | for l in raw_tags: 40 | f0.write(l) 41 | f0.write('\n') 42 | 43 | 44 | if __name__ == "__main__": 45 | parser = argparse.ArgumentParser(description = 'Make text files of all keyword tags from models.') 46 | parser.add_argument('--model_dir', help='input txt file', type=Path) 47 | parser.add_argument('--text_dir', help='input txt file', type=Path) 48 | args = parser.parse_args() 49 | main(args.model_dir, args.text_dir) 50 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* 2 | .coverage 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nasa/concept-tagging-api/c65c217a4e97f29cbeffe350e7ffb1f4d1e688ac/tests/__init__.py -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.insert( 5 | 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../service/")) 6 | ) 7 | 8 | import app 9 | -------------------------------------------------------------------------------- /tests/test_app.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from .context import app 3 | from app import app, SERVICE_ROOT_PATH, FIND_TERMS_METHOD_NAME 4 | from tempfile import TemporaryDirectory 5 | from dsconcept.get_metrics import StubBestEstimator, HierarchicalClassifier 6 | from sklearn.feature_extraction import DictVectorizer 7 | import json 8 | 9 | 10 | class MyTestCase(unittest.TestCase): 11 | def setUp(self): 12 | self.d = TemporaryDirectory() 13 | cat_clfs = [ 14 | {"best_estimator_": StubBestEstimator(), "concept": "physics"}, 15 | {"best_estimator_": StubBestEstimator(), "concept": "video games"}, 16 | ] 17 | kwd_clfs = { 18 | ("physics", "gamma ray"): StubBestEstimator(), 19 | ("video games", "minecraft"): StubBestEstimator(), 20 | ("video games", "kerbal space program"): StubBestEstimator(), 21 | ("", "minecraft"): StubBestEstimator(), 22 | ("", "gamma ray"): StubBestEstimator(), 23 | ("", "penguins"): StubBestEstimator(), 24 | } 25 | v = DictVectorizer() 26 | d = [{"astronauts": 1, "astronomy": 1}, {"space": 1, "basalt": 1}] 27 | v.fit(d) 28 | app.config["HIER_CLF"] = HierarchicalClassifier(cat_clfs, kwd_clfs) 29 | app.config["HIER_CLF"].vectorizer = v 30 | app.config["CAT_RAW2LEMMA"] = { 31 | "PHYSICS": "physics", 32 | "VIDEO GAMES": "video games", 33 | } 34 | app.config["CAT_LEMMA2RAW"] = { 35 | v: k for k, v in app.config["CAT_RAW2LEMMA"].items() 36 | } 37 | app.config["KWD_RAW2LEMMA"] = { 38 | "MINECRAFT": "minecraft", 39 | "GAMMA RAY": "gamma ray", 40 | "KERBAL SPACE PROGRAM": "kerbal space program", 41 | "PENGUINS": "penguins", 42 | } 43 | app.config["KWD_LEMMA2RAW"] = { 44 | v: k for k, v in app.config["KWD_RAW2LEMMA"].items() 45 | } 46 | app.config["WEIGHTS"] = { 47 | "NOUN": 1, 48 | "NOUN_CHUNK": 1, 49 | "ENT": 1, 50 | "PROPN": 1, 51 | "ACRONYM": 1, 52 | } 53 | self.app = app.test_client() 54 | 55 | def test_home(self): 56 | response = self.app.get(f"{SERVICE_ROOT_PATH}/") 57 | self.assertEqual(response.status_code, 200) 58 | 59 | def test_find_terms(self): 60 | data = { 61 | "text": [ 62 | "Astronauts go on space walks.", 63 | "Basalt rocks and minerals are on earth.", 64 | ], 65 | "probability_threshold": "0.5", 66 | "topic_threshold": "0.9", 67 | "request_id": "example_id10", 68 | } 69 | response = self.app.post( 70 | f"{SERVICE_ROOT_PATH}/{FIND_TERMS_METHOD_NAME}/", 71 | data=json.dumps(data), 72 | follow_redirects=True, 73 | ) 74 | self.assertEqual(response.status_code, 200) 75 | r_data = json.loads(response.data) 76 | for rt in ["features", "sti_keywords", "topic_probabilities"]: 77 | self.assertEqual(len(r_data["payload"][rt]), 2) 78 | 79 | def test_find_terms_single(self): 80 | data = { 81 | "text": "Astronauts go on space walks.", 82 | "probability_threshold": "0.5", 83 | "topic_threshold": "0.9", 84 | "request_id": "example_id10", 85 | } 86 | response = self.app.post( 87 | f"{SERVICE_ROOT_PATH}/{FIND_TERMS_METHOD_NAME}/", 88 | data=json.dumps(data), 89 | follow_redirects=True, 90 | ) 91 | self.assertEqual(response.status_code, 200) 92 | r_data = json.loads(response.data) 93 | for rt in ["features", "sti_keywords", "topic_probabilities"]: 94 | self.assertEqual(len(r_data["payload"][rt]), 1) 95 | 96 | 97 | if __name__ == "__main__": 98 | unittest.main() 99 | -------------------------------------------------------------------------------- /version.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools_scm import get_version 3 | 4 | version = get_version(root=os.path.dirname(os.path.abspath(__file__))) 5 | version = ".".join(version.split(".")[:3]) 6 | print(version) 7 | --------------------------------------------------------------------------------