├── full_text_search ├── web │ ├── __init__.py │ ├── migrations │ │ ├── __init__.py │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20190525_0647.py │ │ └── 0003_create_text_search_trigger.py │ ├── tests.py │ ├── admin.py │ ├── views.py │ ├── apps.py │ ├── models.py │ └── index_wikipedia.py ├── full_text_search │ ├── __init__.py │ ├── wsgi.py │ ├── urls.py │ └── settings.py ├── docker-compose.yml └── manage.py ├── README.md └── .gitignore /full_text_search/web/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /full_text_search/web/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /full_text_search/full_text_search/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /full_text_search/web/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /full_text_search/web/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /full_text_search/web/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | # Create your views here. 4 | -------------------------------------------------------------------------------- /full_text_search/web/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class WebConfig(AppConfig): 5 | name = 'web' 6 | -------------------------------------------------------------------------------- /full_text_search/docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '2.4' 3 | services: 4 | postgres: 5 | image: postgres:11-alpine 6 | ports: 7 | - '5432:5432' 8 | environment: 9 | # Set the Postgres environment variables for bootstrapping the default 10 | # database and user. 11 | POSTGRES_DB: "my_db" 12 | POSTGRES_USER: "me" 13 | POSTGRES_PASSWORD: "password" 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # Full text search with Django and Postgres 6 | 7 | This repository contains example code for configuring and optimizing Django full 8 | text search with Postgres. 9 | 10 | A detailed how to can be found [here](https://findwork.dev/blog/optimizing-postgres-full-text-search-in-django/). 11 | -------------------------------------------------------------------------------- /full_text_search/web/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.contrib.postgres.search import SearchVectorField 3 | from django.contrib.postgres.indexes import GinIndex 4 | 5 | class Page(models.Model): 6 | title = models.CharField(max_length=100, unique=True) 7 | content = models.TextField() 8 | 9 | # New modifications. A field and an index 10 | content_search = SearchVectorField(null=True) 11 | 12 | class Meta: 13 | indexes = [GinIndex(fields=["content_search"])] 14 | -------------------------------------------------------------------------------- /full_text_search/full_text_search/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for full_text_search project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'full_text_search.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /full_text_search/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == '__main__': 6 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'full_text_search.settings') 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError as exc: 10 | raise ImportError( 11 | "Couldn't import Django. Are you sure it's installed and " 12 | "available on your PYTHONPATH environment variable? Did you " 13 | "forget to activate a virtual environment?" 14 | ) from exc 15 | execute_from_command_line(sys.argv) 16 | -------------------------------------------------------------------------------- /full_text_search/web/index_wikipedia.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import wikipedia 3 | from .models import Page 4 | 5 | logger = logging.getLogger('django') 6 | 7 | def index_wikipedia(num_pages): 8 | for _ in range(0, num_pages): 9 | p = wikipedia.random() 10 | try: 11 | wiki_page = wikipedia.page(p) 12 | Page.objects.update_or_create(title=wiki_page.title, defaults={ 13 | 'content': wiki_page.content 14 | }) 15 | logger.info('Successfully indexed %s', wiki_page) 16 | except Exception: 17 | logger.exception('Failed to index %s', p) 18 | -------------------------------------------------------------------------------- /full_text_search/web/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.1 on 2019-05-25 06:25 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | initial = True 9 | 10 | dependencies = [ 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name='Page', 16 | fields=[ 17 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 18 | ('title', models.CharField(max_length=100, unique=True)), 19 | ('content', models.TextField()), 20 | ], 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /full_text_search/web/migrations/0002_auto_20190525_0647.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.1 on 2019-05-25 06:47 2 | 3 | import django.contrib.postgres.indexes 4 | import django.contrib.postgres.search 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('web', '0001_initial'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='page', 17 | name='content_search', 18 | field=django.contrib.postgres.search.SearchVectorField(null=True), 19 | ), 20 | migrations.AddIndex( 21 | model_name='page', 22 | index=django.contrib.postgres.indexes.GinIndex(fields=['content_search'], name='web_page_content_505071_gin'), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /full_text_search/full_text_search/urls.py: -------------------------------------------------------------------------------- 1 | """full_text_search URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/2.1/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.urls import include, path 14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 15 | """ 16 | from django.contrib import admin 17 | from django.urls import path 18 | 19 | urlpatterns = [ 20 | path('admin/', admin.site.urls), 21 | ] 22 | -------------------------------------------------------------------------------- /full_text_search/web/migrations/0003_create_text_search_trigger.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations 2 | 3 | class Migration(migrations.Migration): 4 | 5 | dependencies = [ 6 | # NOTE: The previous migration probably looks different for you, so 7 | # modify this. 8 | ('web', '0002_auto_20190525_0647'), 9 | ] 10 | 11 | migration = ''' 12 | CREATE TRIGGER content_search_update BEFORE INSERT OR UPDATE 13 | ON web_page FOR EACH ROW EXECUTE FUNCTION 14 | tsvector_update_trigger(content_search, 'pg_catalog.english', content); 15 | 16 | -- Force triggers to run and populate the text_search column. 17 | UPDATE web_page set ID = ID; 18 | ''' 19 | 20 | reverse_migration = ''' 21 | DROP TRIGGER content_search ON web_page; 22 | ''' 23 | 24 | operations = [ 25 | migrations.RunSQL(migration, reverse_migration) 26 | ] 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | # Edit at https://www.gitignore.io/?templates=python 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # celery beat schedule file 98 | celerybeat-schedule 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | 130 | # End of https://www.gitignore.io/api/python 131 | -------------------------------------------------------------------------------- /full_text_search/full_text_search/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for full_text_search project. 3 | 4 | Generated by 'django-admin startproject' using Django 2.1.7. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.1/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/2.1/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | SECRET_KEY = '^h@ho3*i-9=-ajr1yx-$-s#xo$@w=y!j!9q^f!h+2ezif0tcwt' 24 | 25 | # SECURITY WARNING: don't run with debug turned on in production! 26 | DEBUG = True 27 | 28 | ALLOWED_HOSTS = [] 29 | 30 | 31 | # Application definition 32 | 33 | INSTALLED_APPS = [ 34 | 'django.contrib.admin', 35 | 'django.contrib.auth', 36 | 'django.contrib.contenttypes', 37 | 'django.contrib.sessions', 38 | 'django.contrib.messages', 39 | 'django.contrib.staticfiles', 40 | 'django.contrib.postgres', 41 | 'django_extensions', 42 | 'web', 43 | ] 44 | 45 | MIDDLEWARE = [ 46 | 'django.middleware.security.SecurityMiddleware', 47 | 'django.contrib.sessions.middleware.SessionMiddleware', 48 | 'django.middleware.common.CommonMiddleware', 49 | 'django.middleware.csrf.CsrfViewMiddleware', 50 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 51 | 'django.contrib.messages.middleware.MessageMiddleware', 52 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 53 | ] 54 | 55 | ROOT_URLCONF = 'full_text_search.urls' 56 | 57 | TEMPLATES = [ 58 | { 59 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 60 | 'DIRS': [], 61 | 'APP_DIRS': True, 62 | 'OPTIONS': { 63 | 'context_processors': [ 64 | 'django.template.context_processors.debug', 65 | 'django.template.context_processors.request', 66 | 'django.contrib.auth.context_processors.auth', 67 | 'django.contrib.messages.context_processors.messages', 68 | ], 69 | }, 70 | }, 71 | ] 72 | 73 | WSGI_APPLICATION = 'full_text_search.wsgi.application' 74 | 75 | 76 | # Database 77 | # https://docs.djangoproject.com/en/2.1/ref/settings/#databases 78 | 79 | DATABASES = { 80 | "default": { 81 | "ENGINE": "django.db.backends.postgresql", 82 | "NAME": "my_db", 83 | "USER": "me", 84 | "PASSWORD": "password", 85 | "HOST": "localhost", 86 | "PORT": "5432", 87 | "OPTIONS": {"connect_timeout": 2}, 88 | } 89 | } 90 | 91 | 92 | # Password validation 93 | # https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators 94 | 95 | AUTH_PASSWORD_VALIDATORS = [ 96 | { 97 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 98 | }, 99 | { 100 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 101 | }, 102 | { 103 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 104 | }, 105 | { 106 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 107 | }, 108 | ] 109 | 110 | 111 | # Internationalization 112 | # https://docs.djangoproject.com/en/2.1/topics/i18n/ 113 | 114 | LANGUAGE_CODE = 'en-us' 115 | 116 | TIME_ZONE = 'UTC' 117 | 118 | USE_I18N = True 119 | 120 | USE_L10N = True 121 | 122 | USE_TZ = True 123 | 124 | 125 | # Static files (CSS, JavaScript, Images) 126 | # https://docs.djangoproject.com/en/2.1/howto/static-files/ 127 | 128 | STATIC_URL = '/static/' 129 | --------------------------------------------------------------------------------