├── simple_chatbot ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ └── simple_chatbot_initial.py ├── migrations │ ├── __init__.py │ ├── 0003_auto_20210501_1540.py │ ├── 0002_auto_20210501_1537.py │ ├── 0005_auto_20210502_1535.py │ ├── 0004_usermessageinput.py │ └── 0001_initial.py ├── __init__.py ├── apps.py ├── responses.py ├── tokenizer.py ├── serializers.py ├── admin.py ├── signals.py ├── utils.py ├── views.py ├── settings.py └── models.py ├── MANIFEST.in ├── setup.cfg ├── tests ├── __init__.py ├── test_tokenizer.py └── settings.py ├── .gitignore ├── setup.py ├── LICENSE └── README.md /simple_chatbot/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /simple_chatbot/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /simple_chatbot/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.rst -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | long_description = file: README.md 3 | long_description_content_type = text/markdown -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | import os 3 | 4 | nltk.download('punkt') 5 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'tests.settings') 6 | -------------------------------------------------------------------------------- /simple_chatbot/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0, 0, 9) 2 | __version__ = ".".join(str(i) for i in VERSION) 3 | 4 | default_app_config = "simple_chatbot.apps.SimpleChatbotConfig" 5 | -------------------------------------------------------------------------------- /simple_chatbot/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class SimpleChatbotConfig(AppConfig): 5 | default_auto_field = 'django.db.models.BigAutoField' 6 | name = 'simple_chatbot' 7 | 8 | def ready(self): 9 | from simple_chatbot import signals 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | .DS_Store 4 | 5 | venv/ 6 | env/ 7 | virtualenv/ 8 | 9 | .vscode/ 10 | .idea/ 11 | 12 | *.sqlite3 13 | *.log 14 | local_settings.py 15 | 16 | # dummy django stuff 17 | django_base 18 | manage.py 19 | 20 | build/ 21 | dist/ 22 | *.egg-info 23 | .pypirc -------------------------------------------------------------------------------- /simple_chatbot/responses.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from random import choice 3 | 4 | 5 | class BaseResponse(ABC): 6 | @abstractmethod 7 | def get_response(self): 8 | pass 9 | 10 | 11 | class GenericRandomResponse(BaseResponse): 12 | choices = () 13 | 14 | def get_response(self): 15 | return choice(self.choices) 16 | -------------------------------------------------------------------------------- /simple_chatbot/tokenizer.py: -------------------------------------------------------------------------------- 1 | import re 2 | from nltk import word_tokenize 3 | from . import settings 4 | 5 | 6 | def get_tokens_from_pattern(pattern): 7 | alphanumerical_pattern = re.sub(r'[^a-zA-Z0-9 ]', '', pattern).lower() 8 | words = word_tokenize(alphanumerical_pattern) 9 | if settings.MODULES["STEMMER"]: 10 | words = [settings.MODULES["STEMMER"].stem(word) for word in words] 11 | return words 12 | -------------------------------------------------------------------------------- /simple_chatbot/serializers.py: -------------------------------------------------------------------------------- 1 | from rest_framework import serializers 2 | from .models import UserMessageInput 3 | 4 | 5 | class ChatResponseSerializer(serializers.Serializer): 6 | tag = serializers.CharField(read_only=True) 7 | message = serializers.CharField(read_only=True) 8 | 9 | 10 | class UserMessageInputSerializer(serializers.ModelSerializer): 11 | class Meta: 12 | model = UserMessageInput 13 | fields = ('message', 'identified_tag', ) 14 | extra_kwargs = {'identified_tag': {'required': False}} 15 | 16 | def save(self, identified_tag, **kwargs): 17 | data = {"identified_tag": identified_tag, **kwargs} 18 | return super().save(**data) 19 | -------------------------------------------------------------------------------- /simple_chatbot/migrations/0003_auto_20210501_1540.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2 on 2021-05-01 15:40 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('simple_chatbot', '0002_auto_20210501_1537'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name='tag', 15 | name='name', 16 | ), 17 | migrations.AlterField( 18 | model_name='tag', 19 | name='method', 20 | field=models.CharField(choices=[('simple_chatbot.responses.RecomendationResponse', 'Recomendation'), ('simple_chatbot.responses.GreetingResponse', 'Greeting')], max_length=120, unique=True, verbose_name='Method'), 21 | ), 22 | ] -------------------------------------------------------------------------------- /simple_chatbot/management/commands/simple_chatbot_initial.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from django.core.management.base import BaseCommand 3 | from simple_chatbot.models import Pattern 4 | 5 | logging.basicConfig() 6 | logging.getLogger().setLevel(logging.INFO) 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class Command(BaseCommand): 11 | def handle(self, *args, **options): 12 | logger.info("Initializing of default patterns started.") 13 | patterns = [ 14 | "Hi, how are you?", 15 | "Is anyone there?", 16 | "Hello", 17 | "What's up?!", 18 | "hey there!", 19 | "Bye", 20 | "See you later", 21 | "Goodbye", 22 | "I need to go now.", 23 | ] 24 | for pattern in patterns: 25 | Pattern.objects.get_or_create(string=pattern) 26 | logger.info(f">> {pattern}") 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) 5 | 6 | setup( 7 | name="django-simple-chatbot", 8 | version="0.0.9", 9 | author="Janga", 10 | license='MIT License', 11 | author_email="jangascodingplace@gmail.com", 12 | description="A very basic Django Chatbot ft. NLTK and DRF", 13 | url="https://github.com/Codingplace42/django-simple-chatbot", 14 | packages=find_packages(), 15 | classifiers=[ 16 | "Environment :: Web Environment", 17 | "Framework :: Django", 18 | "Intended Audience :: Developers", 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | ], 23 | install_requires=[ 24 | 'django>=2.2', 25 | 'nltk', 26 | 'djangorestframework' 27 | ] 28 | ) 29 | -------------------------------------------------------------------------------- /simple_chatbot/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from .models import Token, Tag, Pattern, UserMessageInput 3 | 4 | 5 | @admin.register(Token) 6 | class TokenAdmin(admin.ModelAdmin): 7 | readonly_fields = ("token", "patterns", ) 8 | 9 | def has_add_permission(self, request): 10 | return False 11 | 12 | 13 | class PatternInline(admin.StackedInline): 14 | model = Pattern 15 | 16 | 17 | @admin.register(Tag) 18 | class TagAdmin(admin.ModelAdmin): 19 | list_display = ("method", ) 20 | inlines = (PatternInline, ) 21 | 22 | 23 | @admin.register(UserMessageInput) 24 | class UserMessageInputAdmin(admin.ModelAdmin): 25 | list_display = ('__str__', 'status', 'identified_tag', 'timestamp', 'correct_tag', ) 26 | list_editable = ('status', 'correct_tag', ) 27 | list_filter = ('status', ) 28 | 29 | def has_add_permission(self, request): 30 | return False 31 | 32 | 33 | @admin.register(Pattern) 34 | class PatternAdmin(admin.ModelAdmin): 35 | list_display = ("__str__", "tag", ) 36 | list_filter = ("tag", ) 37 | readonly_fields = ("tokenized_string", ) 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Janga 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /simple_chatbot/migrations/0002_auto_20210501_1537.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2 on 2021-05-01 15:37 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('simple_chatbot', '0001_initial'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='tag', 15 | name='method', 16 | field=models.CharField(blank=True, choices=[('Recomendation', 'simple_chatbot.responses.RecomendationResponse'), ('Greeting', 'simple_chatbot.responses.GreetingResponse')], max_length=120, verbose_name='Method'), 17 | ), 18 | migrations.AlterField( 19 | model_name='token', 20 | name='patterns', 21 | field=models.ManyToManyField(blank=True, editable=False, null=True, related_name='tokens', to='simple_chatbot.Pattern'), 22 | ), 23 | migrations.AlterField( 24 | model_name='token', 25 | name='token', 26 | field=models.CharField(db_index=True, editable=False, max_length=40, unique=True, verbose_name='token'), 27 | ), 28 | ] -------------------------------------------------------------------------------- /simple_chatbot/migrations/0005_auto_20210502_1535.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2 on 2021-05-02 15:35 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('simple_chatbot', '0004_usermessageinput'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='pattern', 15 | options={'verbose_name': 'pattern', 'verbose_name_plural': 'patterns'}, 16 | ), 17 | migrations.AlterModelOptions( 18 | name='usermessageinput', 19 | options={'ordering': ('-timestamp',), 'verbose_name': 'user message input', 'verbose_name_plural': 'user message inputs'}, 20 | ), 21 | migrations.AlterField( 22 | model_name='tag', 23 | name='method', 24 | field=models.CharField(choices=[], max_length=120, unique=True, verbose_name='Method'), 25 | ), 26 | migrations.AlterField( 27 | model_name='token', 28 | name='patterns', 29 | field=models.ManyToManyField(blank=True, editable=False, related_name='tokens', to='simple_chatbot.Pattern'), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from simple_chatbot import tokenizer, settings 3 | from nltk.stem.lancaster import LancasterStemmer 4 | 5 | 6 | @pytest.fixture 7 | def settings_(): 8 | return { 9 | "MODULES": { 10 | "STEMMER": LancasterStemmer() 11 | } 12 | } 13 | 14 | 15 | class TestTokenizer: 16 | def test_get_tokens_from_pattern_with_stemmer(self, settings_): 17 | settings.MODULES = settings_["MODULES"] 18 | pattern = "Hello world from Chatbot! Let's have some fun ... " 19 | expected_tokens = ['hello', 'world', 'from', 'chatbot', 'let', 'hav', 'som', 'fun'] 20 | tokens = tokenizer.get_tokens_from_pattern(pattern) 21 | assert len(expected_tokens) == len(tokens) 22 | assert all([a == b for a, b in zip(expected_tokens, tokens)]) 23 | 24 | def test_get_tokens_from_pattern_without_stemmer(self): 25 | settings.MODULES["STEMMER"] = None 26 | pattern = "Hello world from Chatbot! Let's have some fun ... " 27 | expected_tokens = ['hello', 'world', 'from', 'chatbot', 'lets', 'have', 'some', 'fun'] 28 | tokens = tokenizer.get_tokens_from_pattern(pattern) 29 | assert len(expected_tokens) == len(tokens) 30 | assert all([a == b for a, b in zip(expected_tokens, tokens)]) 31 | -------------------------------------------------------------------------------- /simple_chatbot/migrations/0004_usermessageinput.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2 on 2021-05-02 00:26 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('simple_chatbot', '0003_auto_20210501_1540'), 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name='UserMessageInput', 16 | fields=[ 17 | ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 18 | ('message', models.CharField(max_length=1024, verbose_name='message')), 19 | ('status', models.BooleanField(blank=True, help_text='Message evaluation right or wrong?', null=True, verbose_name='Status')), 20 | ('timestamp', models.DateTimeField(auto_now_add=True, verbose_name='timestamp')), 21 | ('correct_tag', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='usermessageinput_corrected', to='simple_chatbot.tag', verbose_name='correct tag')), 22 | ('identified_tag', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='usermessageinput_identified', to='simple_chatbot.tag', verbose_name='identified tag')), 23 | ], 24 | ), 25 | ] -------------------------------------------------------------------------------- /simple_chatbot/signals.py: -------------------------------------------------------------------------------- 1 | from django.dispatch import receiver 2 | from django.db.models.signals import post_save, pre_save 3 | from simple_chatbot.models import Pattern, Token, UserMessageInput 4 | 5 | 6 | @receiver(post_save, sender=Pattern) 7 | def create_tokens(instance, *args, **kwargs): 8 | tokens = list(set(instance.tokenized_string.split())) 9 | for token in tokens: 10 | token_instance, _ = Token.objects.get_or_create(token=token) 11 | token_instance.patterns.add(instance) 12 | token_instance.save() 13 | 14 | 15 | @receiver(pre_save, sender=Pattern) 16 | def clear_tokens_on_change(instance, *args, **kwargs): 17 | if not instance.pk: 18 | return 19 | pattern = Pattern.objects.get(id=instance.id) 20 | pattern.tokens.clear() 21 | 22 | 23 | @receiver(post_save, sender=UserMessageInput) 24 | def cp_user_message_input_to_pattern(instance, created, *args, **kwargs): 25 | if created: 26 | return 27 | if instance.status and instance.correct_tag != instance.identified_tag: 28 | instance.correct_tag = instance.identified_tag 29 | instance.save() 30 | return 31 | elif instance.correct_tag == instance.identified_tag and not instance.status: 32 | instance.status = True 33 | instance.save() 34 | return 35 | elif not instance.correct_tag: 36 | return 37 | else: 38 | Pattern.objects.get_or_create(string=instance.message, tag=instance.correct_tag) 39 | -------------------------------------------------------------------------------- /tests/settings.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | # Build paths inside the project like this: BASE_DIR / 'subdir'. 4 | BASE_DIR = Path(__file__).resolve().parent.parent 5 | 6 | 7 | # Application definition 8 | 9 | INSTALLED_APPS = [ 10 | 'simple_chatbot', 11 | 'rest_framework', 12 | ] 13 | 14 | MIDDLEWARE = [ 15 | 'django.middleware.security.SecurityMiddleware', 16 | 'django.contrib.sessions.middleware.SessionMiddleware', 17 | 'django.middleware.common.CommonMiddleware', 18 | 'django.middleware.csrf.CsrfViewMiddleware', 19 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 20 | 'django.contrib.messages.middleware.MessageMiddleware', 21 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 22 | ] 23 | 24 | ROOT_URLCONF = 'django_base.urls' 25 | 26 | 27 | WSGI_APPLICATION = 'django_base.wsgi.application' 28 | 29 | 30 | # Database 31 | # https://docs.djangoproject.com/en/3.2/ref/settings/#databases 32 | 33 | DATABASES = { 34 | 'default': { 35 | 'ENGINE': 'django.db.backends.sqlite3', 36 | 'NAME': BASE_DIR / 'db.sqlite3', 37 | } 38 | } 39 | 40 | # Default primary key field type 41 | # https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field 42 | 43 | DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' 44 | 45 | 46 | SIMPLE_CHATBOT = { 47 | "responses": ( 48 | ("simple_chatbot.responses.RecomendationResponse", "Recomendation"), 49 | ("simple_chatbot.responses.GreetingResponse", "Greeting"), 50 | ), 51 | } 52 | -------------------------------------------------------------------------------- /simple_chatbot/utils.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | 3 | 4 | class PatternMatcher: 5 | def __init__(self, tokens, patterns): 6 | self.tokens = tokens 7 | self.patterns = patterns 8 | 9 | @staticmethod 10 | def get_token_match_ratio(input_token_list, sample_token_list): 11 | matches = [token for token in input_token_list if token in sample_token_list] 12 | return (len(matches), len(input_token_list)) 13 | 14 | def get_pattern_statistics(self): 15 | statistics = dict() 16 | for pattern in self.patterns: 17 | match_ratio = self.get_token_match_ratio( 18 | input_token_list=self.tokens, 19 | sample_token_list=pattern.tokenized_string.split() 20 | ) 21 | abs_ = abs(match_ratio[0] - match_ratio[1]) 22 | if abs_ in statistics: 23 | statistics[abs_].append(pattern) 24 | else: 25 | statistics[abs_] = [pattern] 26 | return statistics 27 | 28 | def get_best_matched_tag(self): 29 | # Statistics: 30 | # {abs_1: [pattern_11, ..., pattern_1n], 31 | # abs_2: [pattern_21, ..., pattern_2n]} 32 | # Pattern must not be unique in abs 33 | 34 | statistics = self.get_pattern_statistics() 35 | abs_list = list(statistics.keys()) 36 | min_abs = min(abs_list) 37 | tags = [pattern.tag for pattern in statistics[min_abs]] 38 | tag_counter = dict(Counter(tags)) 39 | return max(tag_counter, key=tag_counter.get) 40 | -------------------------------------------------------------------------------- /simple_chatbot/views.py: -------------------------------------------------------------------------------- 1 | from rest_framework.response import Response 2 | from rest_framework.generics import GenericAPIView 3 | from .models import Pattern, Tag 4 | from .serializers import ChatResponseSerializer, UserMessageInputSerializer 5 | 6 | 7 | class SimpleChatbot(GenericAPIView): 8 | save_pattern = True 9 | queryset = Pattern.objects 10 | serializer_class = ChatResponseSerializer 11 | 12 | def perform_create(self, serializer, identified_tag): 13 | serializer.save(identified_tag=identified_tag) 14 | 15 | def get_tag(self, string): 16 | return Tag.objects.get_tag_by_string(string) 17 | 18 | def get_response_module(self, tag): 19 | module = tag.method.split(".") 20 | mod = __import__(".".join(module[:-1]), fromlist=module[-1]) 21 | return getattr(mod, module[-1]) 22 | 23 | def evaluate_message(self, request, *args, **kwargs): 24 | message_serializer = UserMessageInputSerializer(data=request.data) 25 | message_serializer.is_valid(raise_exception=True) 26 | tag = self.get_tag(request.data["message"]) 27 | klass = self.get_response_module(tag)() 28 | response = klass.get_response() 29 | if self.save_pattern: 30 | self.perform_create(message_serializer, tag) 31 | data = {"tag": tag.get_method_display(), "message": response} 32 | serializer = self.get_serializer(data) 33 | return Response(serializer.data) 34 | 35 | def post(self, request, *args, **kwargs): 36 | return self.evaluate_message(request, *args, **kwargs) 37 | -------------------------------------------------------------------------------- /simple_chatbot/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2 on 2021-04-30 21:14 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | initial = True 10 | 11 | dependencies = [ 12 | ] 13 | 14 | operations = [ 15 | migrations.CreateModel( 16 | name='Pattern', 17 | fields=[ 18 | ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 19 | ('string', models.CharField(max_length=1024, verbose_name='string')), 20 | ('tokenized_string', models.CharField(editable=False, max_length=1024, verbose_name='tokenized string')), 21 | ], 22 | ), 23 | migrations.CreateModel( 24 | name='Tag', 25 | fields=[ 26 | ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 27 | ('name', models.CharField(max_length=120, verbose_name='name')), 28 | ('description', models.TextField(blank=True, verbose_name='description')), 29 | ], 30 | options={ 31 | 'verbose_name': 'tag', 32 | 'verbose_name_plural': 'tags', 33 | }, 34 | ), 35 | migrations.CreateModel( 36 | name='Token', 37 | fields=[ 38 | ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 39 | ('token', models.CharField(db_index=True, max_length=40, unique=True, verbose_name='token')), 40 | ('patterns', models.ManyToManyField(blank=True, null=True, related_name='tokens', to='simple_chatbot.Pattern')), 41 | ], 42 | options={ 43 | 'verbose_name': 'token', 44 | 'verbose_name_plural': 'tokens', 45 | }, 46 | ), 47 | migrations.AddField( 48 | model_name='pattern', 49 | name='tag', 50 | field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='patterns', to='simple_chatbot.tag', verbose_name='tag'), 51 | ), 52 | ] -------------------------------------------------------------------------------- /simple_chatbot/settings.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from django.conf import settings 3 | from django.test.signals import setting_changed 4 | 5 | nltk.download('punkt') 6 | 7 | 8 | DEFAULTS = { 9 | 'STEMMER_MODULE': 'nltk.stem.lancaster.LancasterStemmer', 10 | "responses": () 11 | } 12 | 13 | MODULES = { 14 | "STEMMER": None 15 | } 16 | 17 | IMPORT_STRINGS = [ 18 | 'stemmer', 19 | ] 20 | 21 | 22 | class SimpleChatbotSettings: 23 | def __init__(self, user_settings=None, defaults=None, modules=None): 24 | if user_settings: 25 | self._user_settings = self.__check_user_settings(user_settings) 26 | self.defaults = defaults or DEFAULTS 27 | self.modules = modules or MODULES 28 | self.set_stemmer() 29 | self._cached_attrs = set() 30 | 31 | def set_stemmer(self): 32 | module = self.user_settings.get('STEMMER_MODULE') 33 | if not module: 34 | module = self.defaults['STEMMER_MODULE'] 35 | 36 | if module: 37 | module_name = ".".join(module.split(".")[:-1]) 38 | class_name = module.split(".")[-1] 39 | exec(f"from {module_name} import {class_name}") 40 | exec(f"self.modules['STEMMER'] = {class_name}()") 41 | else: 42 | self.modules['STEMMER'] = None 43 | 44 | def __check_user_settings(self, user_settings): 45 | return user_settings 46 | 47 | @property 48 | def user_settings(self): 49 | if not hasattr(self, '_user_settings'): 50 | self._user_settings = getattr(settings, 'SIMPLE_CHATBOT', {}) 51 | return self._user_settings 52 | 53 | def __getattr__(self, attr): 54 | if attr not in self.defaults: 55 | raise AttributeError("Invalid API setting: '%s'" % attr) 56 | 57 | try: 58 | # Check if present in user settings 59 | val = self.user_settings[attr] 60 | except KeyError: 61 | # Fall back to defaults 62 | val = self.defaults[attr] 63 | 64 | # Cache the result 65 | self._cached_attrs.add(attr) 66 | setattr(self, attr, val) 67 | return val 68 | 69 | def reload(self): 70 | for attr in self._cached_attrs: 71 | delattr(self, attr) 72 | self._cached_attrs.clear() 73 | if hasattr(self, '_user_settings'): 74 | delattr(self, '_user_settings') 75 | 76 | 77 | simple_chatbot_settings = SimpleChatbotSettings(None, DEFAULTS, MODULES) 78 | 79 | 80 | def reload_simple_chatbot_settings(*args, **kwargs): 81 | setting = kwargs['setting'] 82 | if setting == 'SIMPLE_CHATBOT': 83 | simple_chatbot_settings.reload() 84 | 85 | 86 | setting_changed.connect(reload_simple_chatbot_settings) 87 | -------------------------------------------------------------------------------- /simple_chatbot/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.utils.translation import gettext_lazy as _ 3 | from .tokenizer import get_tokens_from_pattern 4 | from .utils import PatternMatcher 5 | from .settings import simple_chatbot_settings 6 | 7 | 8 | class Token(models.Model): 9 | token = models.CharField( 10 | verbose_name=_("token"), 11 | max_length=40, 12 | db_index=True, 13 | unique=True, 14 | editable=False 15 | ) 16 | patterns = models.ManyToManyField( 17 | to="Pattern", 18 | related_name="%(class)ss", 19 | blank=True, 20 | editable=False 21 | ) 22 | 23 | class Meta: 24 | verbose_name = _("token") 25 | verbose_name_plural = _("tokens") 26 | app_label = "simple_chatbot" 27 | 28 | def save(self, *args, **kwargs): 29 | self.token = self.token.lower() 30 | super().save(*args, **kwargs) 31 | 32 | def __str__(self): 33 | return self.token 34 | 35 | 36 | class TagManager(models.Manager): 37 | def get_tag_by_string(self, string): 38 | tokens = get_tokens_from_pattern(string) 39 | token_instances = Token.objects.filter(token__in=tokens) 40 | pattern_pks = token_instances.values_list("patterns") 41 | patterns = Pattern.objects.filter(id__in=pattern_pks) 42 | pattern_matcher = PatternMatcher(tokens=tokens, patterns=patterns) 43 | return pattern_matcher.get_best_matched_tag() 44 | 45 | 46 | class Tag(models.Model): 47 | method = models.CharField( 48 | verbose_name=_("Method"), 49 | unique=True, 50 | choices=simple_chatbot_settings.responses, 51 | max_length=120 52 | ) 53 | description = models.TextField( 54 | verbose_name=_("description"), 55 | blank=True 56 | ) 57 | 58 | objects = TagManager() 59 | 60 | class Meta: 61 | verbose_name = _("tag") 62 | verbose_name_plural = _("tags") 63 | app_label = "simple_chatbot" 64 | 65 | def __str__(self): 66 | return self.get_method_display() 67 | 68 | 69 | class Pattern(models.Model): 70 | string = models.CharField( 71 | verbose_name=_("string"), 72 | max_length=1024 73 | ) 74 | tokenized_string = models.CharField( 75 | verbose_name=_("tokenized string"), 76 | max_length=1024, 77 | editable=False 78 | ) 79 | tag = models.ForeignKey( 80 | to=Tag, 81 | verbose_name=_("tag"), 82 | related_name="%(class)ss", 83 | blank=True, 84 | null=True, 85 | on_delete=models.SET_NULL 86 | ) 87 | 88 | class Meta: 89 | verbose_name = _("pattern") 90 | verbose_name_plural = _("patterns") 91 | app_label = "simple_chatbot" 92 | 93 | def __str__(self): 94 | if len(self.string) < 63: 95 | return self.string 96 | return f"{self.string[:60]}..." 97 | 98 | def save(self, *args, **kwargs): 99 | self.tokenized_string = " ".join(get_tokens_from_pattern(self.string)) 100 | super().save(*args, **kwargs) 101 | 102 | 103 | class UserMessageInput(models.Model): 104 | message = models.CharField( 105 | verbose_name=_("message"), 106 | max_length=1024 107 | ) 108 | identified_tag = models.ForeignKey( 109 | to=Tag, 110 | verbose_name=_("identified tag"), 111 | related_name="%(class)s_identified", 112 | blank=True, 113 | null=True, 114 | on_delete=models.SET_NULL 115 | ) 116 | status = models.BooleanField( 117 | verbose_name=_("Status"), 118 | blank=True, 119 | null=True, 120 | help_text="Message evaluation right or wrong?" 121 | ) 122 | timestamp = models.DateTimeField( 123 | verbose_name=_("timestamp"), 124 | auto_now_add=True 125 | ) 126 | correct_tag = models.ForeignKey( 127 | to=Tag, 128 | verbose_name=_("correct tag"), 129 | related_name="%(class)s_corrected", 130 | blank=True, 131 | null=True, 132 | on_delete=models.SET_NULL 133 | ) 134 | 135 | class Meta: 136 | verbose_name = _("user message input") 137 | verbose_name_plural = _("user message inputs") 138 | ordering = ('-timestamp', ) 139 | app_label = "simple_chatbot" 140 | 141 | def __str__(self): 142 | if len(self.message) < 63: 143 | return self.message 144 | return f"{self.message[:60]}..." 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Django Simple Chatbot 2 | 3 | It's a very basic Chatbot for Python Django including NLTK and 4 | Django-REST-framework. This Chatbot is currently working without 5 | Machine learning algorithms. Decisions are made by simple statistic evaluation. 6 | 7 | The Algorithm is based on labeled data on your Django Database and the tool 8 | is supporting continuous labeling. 9 | 10 | ## Requirements 11 | - Python (3.7, 3.8, 3.9) 12 | - Django (2.2, 3.0, 3.1, 3.2) 13 | 14 | ## Dependencies 15 | - [Django REST-Framework - Awesome web-browsable Web APIs.](https://www.django-rest-framework.org) 16 | - [NLTK - the Natural Language Toolkit](https://www.nltk.org) 17 | 18 | ## Installation 19 | 20 | Install using `pip` ... 21 | 22 | ``` 23 | pip install django-simple-chatbot 24 | ``` 25 | 26 | add `simple_chatbot` to your `INSTALLED_APPS` setting. 27 | 28 | ``` 29 | INSTALLED_APPS = [ 30 | ..., 31 | 'simple_chatbot' 32 | ] 33 | ``` 34 | 35 | **Note:** Make sure to run `manage.py migrate` after changing your settings. 36 | The simple_chatbot app provides Django database migrations. 37 | 38 | ## Quickstart 39 | 40 | Create a `response.py` file inside of an already installed app. 41 | ``` 42 | from simple_chatbot.responses import GenericRandomResponse 43 | 44 | 45 | class GreetingResponse(GenericRandomResponse): 46 | choices = ("Hey, how can I help you?", 47 | "Hey friend. How are you? How can I help you?") 48 | 49 | 50 | class GoodbyeResponse(GenericRandomResponse): 51 | choices = ("See you later.", 52 | "Thanks for visiting.", 53 | "See ya! Have a nice day.") 54 | ``` 55 | 56 | Add this Response to your `SIMPLE_CHATBOT` setting 57 | ``` 58 | SIMPLE_CHATBOT = { 59 | ... 60 | 'responses': ( 61 | ("YOUR_APP.responses.GreetingResponse", "Greeting"), 62 | ("YOUR_APP.responses.GoodbyeResponse", "Goodbye"), 63 | ), 64 | } 65 | ``` 66 | 67 | Go to your Django admin and create `greeting` and `goodbye` tags. 68 | Your response options will be selectable via choices. 69 | 70 | Go to your Django admin, write some patterns and label them. You can just use 71 | the following labels: 72 | ``` 73 | [Greeting] 74 | "Hi, how are you?", "Is anyone there?", "Hello", "What's up?!", "hey there!" 75 | 76 | ["Goodbye"] 77 | "Bye", "See you later", "Goodbye", "I need to go now." 78 | ``` 79 | **Note** If you do not want to write that patterns by yourself, use a command 80 | `manage.py simple_chatbot_initial`. You need to label them after initializing. 81 | 82 | The package will automatically tokenize the input and map tokens to labels. 83 | 84 | Add simple_chatbot url to your routings: 85 | ``` 86 | from simple_chatbot.views import SimpleChatbot 87 | 88 | urlpatterns = [ 89 | ... 90 | path("simple_chatbot/", SimpleChatbot.as_view()) 91 | ] 92 | ``` 93 | 94 | Make a Post request to your new endpoint: 95 | ``` 96 | curl \ 97 | -H "Content-Type: application/json" \ 98 | --data '{"message":"how r u?"}' \ 99 | http://localhost:8000/simple_chatbot/ 100 | ``` 101 | 102 | The response should look like 103 | ``` 104 | { 105 | "tag": "Greeting", 106 | "message": "Hey, how can I help you?" 107 | } 108 | ``` 109 | 110 | ## Raw Documentation 111 | 112 | ### Database Models 113 | - `Pattern` - message which might be send by a user. Add a tag to the pattern 114 | for being able to identify and response to that message 115 | - `Tag` - includes information about Response class for a specific method 116 | - `Token` - tokenized words which are referencing to different patterns. The 117 | user-input will be identified by different tokens. 118 | - `UserMessageInput` - new inputs from production. It contains information 119 | about chosen pattern. You can label that messages later and include them 120 | into the system. 121 | 122 | ### settings options 123 | You can add following options to your `SIMPLE_CHATBOT` setting: 124 | - STEMMER_MODULE: nltk package for stumming your strings - 125 | default: `nltk.stem.lancaster.LancasterStemmer`. 126 | - responses: choices for your tags. It should reference to a response class. 127 | **Warning** You won't be able to create tags without response classes. 128 | 129 | ### Response Classes 130 | The `simple_chatbot.responses` package provides currently following response classes: 131 | - BaseResponse 132 | - GenericRandomResponse 133 | 134 | #### BaseResponse 135 | It's just an abstract class for require a specific shape of your response 136 | classes. If you are creating a new response, you should inheritance from that 137 | class. 138 | 139 | #### GenericRandomResponse 140 | It will choose a generic answer from class property `choices`. 141 | 142 | ### Views 143 | This `simple_chatbot.responses` includes a single view `SimpleChatbot`. 144 | This view is reusable. The most important changeble option: 145 | - `save_pattern`: if True each message will be saved and you can post label 146 | the incoming messages. default `True`. 147 | 148 | #### SimpleChatbot API Documentation 149 | - Required Request type: `POST` 150 | - payload: `{message: "YOUR MESSAGE"}` 151 | - response: `{tag: 'TAG', message: 'RESPONSE'}` 152 | 153 | ## About 154 | It's a very basic Chatbot decisions are made with tools by NLTK which follows 155 | basic preprocessing for NLP of tokenization and stumming. 156 | 157 | This package is inspired by the Chatbot Tutorial of Tech witch Tim. 158 | Checkout his blog: https://www.techwithtim.net/tutorials/ai-chatbot/ 159 | 160 | In my opinion the used script is teaching important concepts but it's 161 | overtooled by using Deep-Learning algorithm on that small amount of data. 162 | 163 | Real NLP's and Deep Learning algorithms needs a large amount of data. One 164 | problem in smaller and beginning projects: You won't have that amount of data 165 | by starting your projects. 166 | 167 | **This package gives you possibilities to work with a small amount of data and 168 | it helps you to collect new data for being able to use deep learning 169 | algorithms one day.** 170 | 171 | ### Contributing 172 | Fork the repo and get stuck in! 173 | --------------------------------------------------------------------------------