├── full_text_search
├── web
│ ├── __init__.py
│ ├── migrations
│ │ ├── __init__.py
│ │ ├── 0001_initial.py
│ │ ├── 0002_auto_20190525_0647.py
│ │ └── 0003_create_text_search_trigger.py
│ ├── tests.py
│ ├── admin.py
│ ├── views.py
│ ├── apps.py
│ ├── models.py
│ └── index_wikipedia.py
├── full_text_search
│ ├── __init__.py
│ ├── wsgi.py
│ ├── urls.py
│ └── settings.py
├── docker-compose.yml
└── manage.py
├── README.md
└── .gitignore
/full_text_search/web/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/full_text_search/web/migrations/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/full_text_search/full_text_search/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/full_text_search/web/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 |
3 | # Create your tests here.
4 |
--------------------------------------------------------------------------------
/full_text_search/web/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 |
3 | # Register your models here.
4 |
--------------------------------------------------------------------------------
/full_text_search/web/views.py:
--------------------------------------------------------------------------------
1 | from django.shortcuts import render
2 |
3 | # Create your views here.
4 |
--------------------------------------------------------------------------------
/full_text_search/web/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 |
3 |
4 | class WebConfig(AppConfig):
5 | name = 'web'
6 |
--------------------------------------------------------------------------------
/full_text_search/docker-compose.yml:
--------------------------------------------------------------------------------
1 | ---
2 | version: '2.4'
3 | services:
4 | postgres:
5 | image: postgres:11-alpine
6 | ports:
7 | - '5432:5432'
8 | environment:
9 | # Set the Postgres environment variables for bootstrapping the default
10 | # database and user.
11 | POSTGRES_DB: "my_db"
12 | POSTGRES_USER: "me"
13 | POSTGRES_PASSWORD: "password"
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # Full text search with Django and Postgres
6 |
7 | This repository contains example code for configuring and optimizing Django full
8 | text search with Postgres.
9 |
10 | A detailed how to can be found [here](https://findwork.dev/blog/optimizing-postgres-full-text-search-in-django/).
11 |
--------------------------------------------------------------------------------
/full_text_search/web/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | from django.contrib.postgres.search import SearchVectorField
3 | from django.contrib.postgres.indexes import GinIndex
4 |
5 | class Page(models.Model):
6 | title = models.CharField(max_length=100, unique=True)
7 | content = models.TextField()
8 |
9 | # New modifications. A field and an index
10 | content_search = SearchVectorField(null=True)
11 |
12 | class Meta:
13 | indexes = [GinIndex(fields=["content_search"])]
14 |
--------------------------------------------------------------------------------
/full_text_search/full_text_search/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for full_text_search project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'full_text_search.settings')
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/full_text_search/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == '__main__':
6 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'full_text_search.settings')
7 | try:
8 | from django.core.management import execute_from_command_line
9 | except ImportError as exc:
10 | raise ImportError(
11 | "Couldn't import Django. Are you sure it's installed and "
12 | "available on your PYTHONPATH environment variable? Did you "
13 | "forget to activate a virtual environment?"
14 | ) from exc
15 | execute_from_command_line(sys.argv)
16 |
--------------------------------------------------------------------------------
/full_text_search/web/index_wikipedia.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import wikipedia
3 | from .models import Page
4 |
5 | logger = logging.getLogger('django')
6 |
7 | def index_wikipedia(num_pages):
8 | for _ in range(0, num_pages):
9 | p = wikipedia.random()
10 | try:
11 | wiki_page = wikipedia.page(p)
12 | Page.objects.update_or_create(title=wiki_page.title, defaults={
13 | 'content': wiki_page.content
14 | })
15 | logger.info('Successfully indexed %s', wiki_page)
16 | except Exception:
17 | logger.exception('Failed to index %s', p)
18 |
--------------------------------------------------------------------------------
/full_text_search/web/migrations/0001_initial.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 2.2.1 on 2019-05-25 06:25
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 |
8 | initial = True
9 |
10 | dependencies = [
11 | ]
12 |
13 | operations = [
14 | migrations.CreateModel(
15 | name='Page',
16 | fields=[
17 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
18 | ('title', models.CharField(max_length=100, unique=True)),
19 | ('content', models.TextField()),
20 | ],
21 | ),
22 | ]
23 |
--------------------------------------------------------------------------------
/full_text_search/web/migrations/0002_auto_20190525_0647.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 2.2.1 on 2019-05-25 06:47
2 |
3 | import django.contrib.postgres.indexes
4 | import django.contrib.postgres.search
5 | from django.db import migrations
6 |
7 |
8 | class Migration(migrations.Migration):
9 |
10 | dependencies = [
11 | ('web', '0001_initial'),
12 | ]
13 |
14 | operations = [
15 | migrations.AddField(
16 | model_name='page',
17 | name='content_search',
18 | field=django.contrib.postgres.search.SearchVectorField(null=True),
19 | ),
20 | migrations.AddIndex(
21 | model_name='page',
22 | index=django.contrib.postgres.indexes.GinIndex(fields=['content_search'], name='web_page_content_505071_gin'),
23 | ),
24 | ]
25 |
--------------------------------------------------------------------------------
/full_text_search/full_text_search/urls.py:
--------------------------------------------------------------------------------
1 | """full_text_search URL Configuration
2 |
3 | The `urlpatterns` list routes URLs to views. For more information please see:
4 | https://docs.djangoproject.com/en/2.1/topics/http/urls/
5 | Examples:
6 | Function views
7 | 1. Add an import: from my_app import views
8 | 2. Add a URL to urlpatterns: path('', views.home, name='home')
9 | Class-based views
10 | 1. Add an import: from other_app.views import Home
11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
12 | Including another URLconf
13 | 1. Import the include() function: from django.urls import include, path
14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
15 | """
16 | from django.contrib import admin
17 | from django.urls import path
18 |
19 | urlpatterns = [
20 | path('admin/', admin.site.urls),
21 | ]
22 |
--------------------------------------------------------------------------------
/full_text_search/web/migrations/0003_create_text_search_trigger.py:
--------------------------------------------------------------------------------
1 | from django.db import migrations
2 |
3 | class Migration(migrations.Migration):
4 |
5 | dependencies = [
6 | # NOTE: The previous migration probably looks different for you, so
7 | # modify this.
8 | ('web', '0002_auto_20190525_0647'),
9 | ]
10 |
11 | migration = '''
12 | CREATE TRIGGER content_search_update BEFORE INSERT OR UPDATE
13 | ON web_page FOR EACH ROW EXECUTE FUNCTION
14 | tsvector_update_trigger(content_search, 'pg_catalog.english', content);
15 |
16 | -- Force triggers to run and populate the text_search column.
17 | UPDATE web_page set ID = ID;
18 | '''
19 |
20 | reverse_migration = '''
21 | DROP TRIGGER content_search ON web_page;
22 | '''
23 |
24 | operations = [
25 | migrations.RunSQL(migration, reverse_migration)
26 | ]
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.gitignore.io/api/python
3 | # Edit at https://www.gitignore.io/?templates=python
4 |
5 | ### Python ###
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | pip-wheel-metadata/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .nox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | .hypothesis/
56 | .pytest_cache/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # celery beat schedule file
98 | celerybeat-schedule
99 |
100 | # SageMath parsed files
101 | *.sage.py
102 |
103 | # Environments
104 | .env
105 | .venv
106 | env/
107 | venv/
108 | ENV/
109 | env.bak/
110 | venv.bak/
111 |
112 | # Spyder project settings
113 | .spyderproject
114 | .spyproject
115 |
116 | # Rope project settings
117 | .ropeproject
118 |
119 | # mkdocs documentation
120 | /site
121 |
122 | # mypy
123 | .mypy_cache/
124 | .dmypy.json
125 | dmypy.json
126 |
127 | # Pyre type checker
128 | .pyre/
129 |
130 | # End of https://www.gitignore.io/api/python
131 |
--------------------------------------------------------------------------------
/full_text_search/full_text_search/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for full_text_search project.
3 |
4 | Generated by 'django-admin startproject' using Django 2.1.7.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/2.1/topics/settings/
8 |
9 | For the full list of settings and their values, see
10 | https://docs.djangoproject.com/en/2.1/ref/settings/
11 | """
12 |
13 | import os
14 |
15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
17 |
18 |
19 | # Quick-start development settings - unsuitable for production
20 | # See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/
21 |
22 | # SECURITY WARNING: keep the secret key used in production secret!
23 | SECRET_KEY = '^h@ho3*i-9=-ajr1yx-$-s#xo$@w=y!j!9q^f!h+2ezif0tcwt'
24 |
25 | # SECURITY WARNING: don't run with debug turned on in production!
26 | DEBUG = True
27 |
28 | ALLOWED_HOSTS = []
29 |
30 |
31 | # Application definition
32 |
33 | INSTALLED_APPS = [
34 | 'django.contrib.admin',
35 | 'django.contrib.auth',
36 | 'django.contrib.contenttypes',
37 | 'django.contrib.sessions',
38 | 'django.contrib.messages',
39 | 'django.contrib.staticfiles',
40 | 'django.contrib.postgres',
41 | 'django_extensions',
42 | 'web',
43 | ]
44 |
45 | MIDDLEWARE = [
46 | 'django.middleware.security.SecurityMiddleware',
47 | 'django.contrib.sessions.middleware.SessionMiddleware',
48 | 'django.middleware.common.CommonMiddleware',
49 | 'django.middleware.csrf.CsrfViewMiddleware',
50 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
51 | 'django.contrib.messages.middleware.MessageMiddleware',
52 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
53 | ]
54 |
55 | ROOT_URLCONF = 'full_text_search.urls'
56 |
57 | TEMPLATES = [
58 | {
59 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
60 | 'DIRS': [],
61 | 'APP_DIRS': True,
62 | 'OPTIONS': {
63 | 'context_processors': [
64 | 'django.template.context_processors.debug',
65 | 'django.template.context_processors.request',
66 | 'django.contrib.auth.context_processors.auth',
67 | 'django.contrib.messages.context_processors.messages',
68 | ],
69 | },
70 | },
71 | ]
72 |
73 | WSGI_APPLICATION = 'full_text_search.wsgi.application'
74 |
75 |
76 | # Database
77 | # https://docs.djangoproject.com/en/2.1/ref/settings/#databases
78 |
79 | DATABASES = {
80 | "default": {
81 | "ENGINE": "django.db.backends.postgresql",
82 | "NAME": "my_db",
83 | "USER": "me",
84 | "PASSWORD": "password",
85 | "HOST": "localhost",
86 | "PORT": "5432",
87 | "OPTIONS": {"connect_timeout": 2},
88 | }
89 | }
90 |
91 |
92 | # Password validation
93 | # https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators
94 |
95 | AUTH_PASSWORD_VALIDATORS = [
96 | {
97 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
98 | },
99 | {
100 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
101 | },
102 | {
103 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
104 | },
105 | {
106 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
107 | },
108 | ]
109 |
110 |
111 | # Internationalization
112 | # https://docs.djangoproject.com/en/2.1/topics/i18n/
113 |
114 | LANGUAGE_CODE = 'en-us'
115 |
116 | TIME_ZONE = 'UTC'
117 |
118 | USE_I18N = True
119 |
120 | USE_L10N = True
121 |
122 | USE_TZ = True
123 |
124 |
125 | # Static files (CSS, JavaScript, Images)
126 | # https://docs.djangoproject.com/en/2.1/howto/static-files/
127 |
128 | STATIC_URL = '/static/'
129 |
--------------------------------------------------------------------------------