├── text_analysis ├── main │ ├── __init__.py │ ├── urls.py │ ├── forms.py │ ├── views.py │ ├── static │ │ └── demo.html │ ├── tests.py │ └── mecab_utils.py ├── text_analysis │ ├── __init__.py │ ├── settings │ │ ├── test.py │ │ ├── production.py │ │ └── __init__.py │ ├── urls.py │ └── wsgi.py └── manage.py ├── requirements.txt ├── typography-icon.png ├── .coveragerc ├── newrelic.ini ├── .github └── dependabot.yml ├── tox.ini ├── NOTICE ├── uwsgi.ini ├── .gitignore ├── LICENSE ├── Dockerfile ├── .circleci └── config.yml └── README.md /text_analysis/main/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /text_analysis/text_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Django==3.2.14 2 | django-cors-headers==3.13.0 3 | mecab-python3==0.7 4 | -------------------------------------------------------------------------------- /typography-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bungoume/mecab-web-api/HEAD/typography-icon.png -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | include = text_analysis/** 3 | omit = 4 | text_analysis/*/tests/** 5 | text_analysis/*/tests.py 6 | -------------------------------------------------------------------------------- /newrelic.ini: -------------------------------------------------------------------------------- 1 | [newrelic:development] 2 | app_name = Text Analysis API (development) 3 | 4 | [newrelic:production] 5 | app_name = Text Analysis API 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | time: "11:00" 8 | timezone: Asia/Tokyo 9 | open-pull-requests-limit: 10 10 | -------------------------------------------------------------------------------- /text_analysis/main/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | 3 | from main import views 4 | 5 | urlpatterns = [ 6 | url(r'^parse$', views.parse, name='parse'), 7 | url(r'^reading$', views.reading, name='reading'), 8 | ] 9 | -------------------------------------------------------------------------------- /text_analysis/text_analysis/settings/test.py: -------------------------------------------------------------------------------- 1 | from text_analysis.settings import * # NOQA 2 | 3 | 4 | DATABASES = { 5 | 'default': { 6 | 'ENGINE': 'django.db.backends.sqlite3', 7 | 'NAME': ':memory:', 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /text_analysis/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") 7 | 8 | from django.core.management import execute_from_command_line 9 | 10 | execute_from_command_line(sys.argv) 11 | -------------------------------------------------------------------------------- /text_analysis/text_analysis/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import include, url 2 | 3 | urlpatterns = [ 4 | url(r'^text-analysis/v1/', include('main.urls')), 5 | url(r'^v1/', include('main.urls')), 6 | ] 7 | 8 | handler400 = 'main.views.handler400' 9 | handler403 = 'main.views.handler403' 10 | handler404 = 'main.views.handler404' 11 | handler500 = 'main.views.handler500' 12 | -------------------------------------------------------------------------------- /text_analysis/text_analysis/settings/production.py: -------------------------------------------------------------------------------- 1 | from text_analysis.settings import * # NOQA 2 | 3 | 4 | ALLOWED_HOSTS = ['*'] 5 | 6 | DEBUG = False 7 | 8 | SESSION_COOKIE_SECURE = True 9 | CSRF_COOKIE_SECURE = True 10 | 11 | 12 | ####################### 13 | # SECURITY MIDDLEWARE # 14 | ####################### 15 | SECURE_BROWSER_XSS_FILTER = True 16 | SECURE_CONTENT_TYPE_NOSNIFF = True 17 | SECURE_HSTS_SECONDS = 31536000 18 | SECURE_SSL_REDIRECT = True 19 | -------------------------------------------------------------------------------- /text_analysis/text_analysis/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for text_analysis project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py37, flake8 3 | skipsdist = True 4 | setupdir = ./text_analysis/ 5 | [testenv:py37] 6 | deps = coverage 7 | testfixtures 8 | -rrequirements.txt 9 | setenv = DJANGO_SETTINGS_MODULE = text_analysis.settings.test 10 | commands = 11 | pip install -r requirements.txt 12 | coverage erase 13 | coverage run text_analysis/manage.py test text_analysis 14 | coverage report 15 | 16 | [testenv:flake8] 17 | basepython = python3.7 18 | deps = flake8 19 | commands = flake8 text_analysis 20 | 21 | [flake8] 22 | max-line-length = 99 23 | exclude = text_analysis/*/migrations/* 24 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | List of open-source software used/depended on this project: 2 | 3 | MeCab 4 | https://taku910.github.io/mecab/ 5 | Copyright (c) 2001-2008, Taku Kudo 6 | Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation 7 | License: BSD (https://github.com/taku910/mecab/blob/master/mecab/BSD) 8 | 9 | Django 10 | https://www.djangoproject.com/ 11 | Copyright (c) Django Software Foundation and individual contributors. 12 | License: BSD (https://github.com/django/django/blob/master/LICENSE) 13 | 14 | mecab-python3 15 | https://github.com/SamuraiT/mecab-python3 16 | License: BSD (https://github.com/SamuraiT/mecab-python3/blob/master/BSD) 17 | -------------------------------------------------------------------------------- /uwsgi.ini: -------------------------------------------------------------------------------- 1 | [uwsgi] 2 | strict=true 3 | chdir=/usr/src/app/text_analysis/ 4 | env=DJANGO_SETTINGS_MODULE=text_analysis.settings 5 | module=text_analysis.wsgi 6 | enable-threads=true 7 | single-interpreter=true 8 | master=true 9 | vacuum=true 10 | harakiri=60 11 | processes=3 12 | threads=3 13 | die-on-term=true 14 | threads-stacksize=2048 15 | reload-on-rss=320 16 | evil-reload-on-rss=384 17 | post-buffering=8192 18 | buffer-size=32768 19 | reuse-port=true 20 | thunder-lock=true 21 | 22 | py-tracebacker=/tmp/tbsocket. 23 | req-logger=file:/log/uwsgi-access.log 24 | logger=file:/log/uwsgi.log 25 | 26 | [development] 27 | ini=:uwsgi 28 | socket=0.0.0.0:8000 29 | env=DJANGO_SETTINGS_MODULE=text_analysis.settings.development 30 | 31 | [production] 32 | ini=:uwsgi 33 | socket=0.0.0.0:8000 34 | env=DJANGO_SETTINGS_MODULE=text_analysis.settings.production 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /text_analysis/main/forms.py: -------------------------------------------------------------------------------- 1 | from django import forms 2 | from django.core import validators 3 | 4 | 5 | class ReadingForm(forms.Form): 6 | sentence = forms.CharField(required=False) 7 | nbest_num = forms.IntegerField(validators=[ 8 | validators.MinValueValidator(1), validators.MaxValueValidator(50)], required=False) 9 | 10 | def clean_sentence(self): 11 | return self.cleaned_data.get('sentence', '') 12 | 13 | def clean_nbest_num(self): 14 | nbest_num = self.cleaned_data.get('nbest_num') 15 | if nbest_num is None: 16 | return 10 17 | return nbest_num 18 | 19 | 20 | class ParseForm(forms.Form): 21 | sentence = forms.CharField(required=False) 22 | nbest_num = forms.IntegerField(validators=[ 23 | validators.MinValueValidator(1), validators.MaxValueValidator(50)], required=False) 24 | 25 | def clean_sentence(self): 26 | return self.cleaned_data.get('sentence', '') 27 | 28 | def clean_nbest_num(self): 29 | nbest_num = self.cleaned_data.get('nbest_num') 30 | if nbest_num is None: 31 | return 3 32 | return nbest_num 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Yuri UMEZAKI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7.0-alpine 2 | 3 | RUN mkdir -p /usr/src/app && mkdir /log && \ 4 | apk --no-cache --update add \ 5 | build-base \ 6 | linux-headers \ 7 | openssl \ 8 | libstdc++ \ 9 | bash \ 10 | curl \ 11 | file \ 12 | git \ 13 | ca-certificates && \ 14 | cd /tmp && \ 15 | wget -O mecab-0.996.tar.gz "https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7cENtOXlicTFaRUE" && \ 16 | tar xvzf mecab-0.996.tar.gz && \ 17 | cd mecab-0.996 && \ 18 | ./configure --enable-utf8-only && \ 19 | make && make install && \ 20 | mkdir -p /usr/local/lib/mecab/dic && \ 21 | chmod 777 /usr/local/lib/mecab/dic && \ 22 | cd /tmp && \ 23 | git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && \ 24 | cd mecab-ipadic-neologd && \ 25 | ./bin/install-mecab-ipadic-neologd -n -y && \ 26 | sed -i "s/ipadic$/mecab-ipadic-neologd/g" /usr/local/etc/mecabrc && \ 27 | pip install uWSGI mecab-python3==0.7 && \ 28 | apk del build-base linux-headers && \ 29 | rm -rf /tmp/* /var/tmp/* /var/cache/apk/* /root/.cache/pip/* 30 | 31 | WORKDIR /usr/src/app 32 | 33 | COPY requirements.txt /usr/src/app/ 34 | RUN pip install --no-cache-dir -r requirements.txt && \ 35 | rm -rf /tmp/* /var/tmp/* /root/.cache/pip/* 36 | 37 | COPY . /usr/src/app 38 | 39 | ENV DJANGO_SETTINGS_MODULE=text_analysis.settings.production 40 | 41 | RUN python text_analysis/manage.py collectstatic --noinput 42 | 43 | EXPOSE 8000 44 | 45 | ENV UWSGI_ENV production 46 | 47 | CMD ["uwsgi", "--ini", "uwsgi.ini:${UWSGI_ENV}"] 48 | 49 | # RUN pip install newrelic 50 | # ENV NEW_RELIC_ENVIRONMENT ${UWSGI_ENV} 51 | # ENV NEW_RELIC_LICENSE_KEY {{ YOUR_LICENSE_KEY }} 52 | # ENV NEW_RELIC_APP_NAME {{ THIS_APP_NAME }} 53 | # CMD ["newrelic-admin", "run-program", "uwsgi", "--ini", "uwsgi.ini:${UWSGI_ENV}"] 54 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | test: 4 | working_directory: ~/myapp 5 | docker: 6 | - image: circleci/python:3.7.0 7 | steps: 8 | - checkout 9 | - run: 10 | command: python -m venv env 11 | - restore_cache: 12 | keys: 13 | - v1-myapp-{{ checksum "requirements.txt" }} 14 | - restore_cache: 15 | keys: 16 | - v1-myapp-mecab-0.996 17 | - run: 18 | name: Install Mecab 19 | command: | 20 | cd ~ 21 | if [[ ! -e mecab-0.996/src/mecab ]]; then 22 | wget -O mecab-0.996.tar.gz "https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7cENtOXlicTFaRUE" 23 | tar xvzf mecab-0.996.tar.gz 24 | rm -f mecab-0.996.tar.gz 25 | cd mecab-0.996 26 | ./configure --enable-utf8-only 27 | make 28 | else 29 | cd mecab-0.996 30 | fi 31 | sudo make install 32 | sudo ldconfig 33 | sudo mkdir -p /usr/local/lib/mecab/dic 34 | sudo chmod 777 /usr/local/lib/mecab/dic 35 | - restore_cache: 36 | keys: 37 | - v1-myapp-mecab-ipadic-neologd-201809 # neologd更新時はこことsave_cacheの日付を変更してください。 38 | - run: 39 | name: Install neologd 40 | command: | 41 | if [[ ! -e /usr/local/lib/mecab/dic/mecab-ipadic-neologd/sys.dic ]]; then 42 | cd ~ 43 | git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git 44 | cd mecab-ipadic-neologd 45 | ./bin/install-mecab-ipadic-neologd -n -y 46 | fi 47 | sudo sed -i "s/ipadic$/mecab-ipadic-neologd/g" /usr/local/etc/mecabrc 48 | - run: 49 | name: Run tox 50 | command: | 51 | . env/bin/activate 52 | pip install -U tox 53 | tox 54 | - run: 55 | name: Run coveralls 56 | command: | 57 | . env/bin/activate 58 | pip install -U coveralls 59 | coveralls 60 | - save_cache: 61 | key: v1-myapp-{{ checksum "requirements.txt" }} 62 | paths: 63 | - ~/myapp/.tox 64 | - ~/myapp/env 65 | - save_cache: 66 | key: v1-myapp-mecab-0.996 67 | paths: 68 | - ~/mecab-0.996 69 | - save_cache: 70 | key: v1-myapp-mecab-ipadic-neologd-201804 71 | paths: 72 | - /usr/local/lib/mecab/dic/mecab-ipadic-neologd 73 | - store_artifacts: 74 | path: .circle_artifacts 75 | - store_test_results: 76 | path: .circle_test_reposts/django 77 | 78 | workflows: 79 | version: 2 80 | test: 81 | jobs: 82 | - test 83 | -------------------------------------------------------------------------------- /text_analysis/main/views.py: -------------------------------------------------------------------------------- 1 | from django.http import HttpResponse, JsonResponse 2 | 3 | from django.views.decorators.cache import cache_control 4 | from django.views.decorators.http import require_http_methods 5 | 6 | from main import mecab_utils 7 | from main.forms import ReadingForm, ParseForm 8 | 9 | 10 | @cache_control(max_age=86400) 11 | @require_http_methods(["GET", "POST", "OPTIONS"]) 12 | def reading(request): 13 | if request.method == "GET": 14 | form = ReadingForm(request.GET) 15 | elif request.method == "POST": 16 | form = ReadingForm(request.POST) 17 | elif request.method == "OPTIONS": 18 | return HttpResponse({}, status=204) 19 | if not form.is_valid(): 20 | return JsonResponse( 21 | {"error": {"code": "form_invalid", "errors": form.errors}}, status=400) 22 | 23 | sentence = form.cleaned_data.get('sentence') 24 | nbest_num = form.cleaned_data.get('nbest_num') 25 | 26 | ret = { 27 | 'input_sentence': sentence, 28 | 'items': mecab_utils.reading_sentence(sentence, nbest_num), 29 | } 30 | 31 | return JsonResponse(ret, json_dumps_params={'ensure_ascii': False, 'separators': (',', ':')}) 32 | 33 | 34 | @cache_control(max_age=86400) 35 | @require_http_methods(["GET", "POST", "OPTIONS"]) 36 | def parse(request): 37 | if request.method == "GET": 38 | form = ParseForm(request.GET) 39 | elif request.method == "POST": 40 | form = ParseForm(request.POST) 41 | elif request.method == "OPTIONS": 42 | return HttpResponse("", status=204) 43 | if not form.is_valid(): 44 | return JsonResponse( 45 | {"error": {"code": "form_invalid", "errors": form.errors}}, status=400) 46 | 47 | sentence = form.cleaned_data.get('sentence') 48 | nbest_num = form.cleaned_data.get('nbest_num') 49 | 50 | ret = { 51 | 'input_sentence': sentence, 52 | 'items': mecab_utils.parse_sentence(sentence, nbest_num), 53 | } 54 | 55 | return JsonResponse(ret, json_dumps_params={'ensure_ascii': False, 'separators': (',', ':')}) 56 | 57 | 58 | def handler400(request, exception): 59 | res = JsonResponse({'error': {'code': 'bad_request', 60 | 'message': "400 Bad Request"}}, status=400) 61 | return res 62 | 63 | 64 | def handler403(request, exception): 65 | res = JsonResponse({'error': {'code': 'permission_denied', 66 | 'message': "403 Permission Denied"}}, status=403) 67 | return res 68 | 69 | 70 | def handler404(request, exception): 71 | res = JsonResponse({'error': {'code': 'not_found', 72 | 'message': "404 Not Found"}}, status=404) 73 | return res 74 | 75 | 76 | def handler500(request): 77 | res = JsonResponse({'error': {'code': 'server_error', 78 | 'message': "500 Internal Server Error"}}, status=500) 79 | return res 80 | -------------------------------------------------------------------------------- /text_analysis/main/static/demo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |