├── .env ├── .gitattributes ├── .gitignore ├── LICENSE.md ├── README.md ├── _config.yml ├── celerybeat-schedule.db ├── django_kafka ├── __init__.py ├── asgi.py ├── celery.py ├── settings.py ├── urls.py └── wsgi.py ├── manage.py └── publish_subscribe ├── __init__.py ├── admin.py ├── apps.py ├── constants.py ├── consumer.py ├── models.py ├── producers.py ├── tasks.py ├── tests.py ├── transformers.py ├── urls.py └── views.py /.env: -------------------------------------------------------------------------------- 1 | DEBUG=True 2 | DATABASE_NAME = 'django-kafka' 3 | DATABASE_USER = '' 4 | DATABASE_PASSWORD = '' 5 | HOST_ENDPOINT = '127.0.0.1' 6 | SCHEMA_REGISTRY_URL = 'http://localhost:8081' 7 | SECRET_KEY = 'gc97l0(yyw(^)bwezc)#^3ir5+!@z1%x18&fs9r%e-q^+ziq0e' -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.css linguist-detectable=false 2 | *.html linguist-detectable=false -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | migrations/ 3 | __pycache__ 4 | db.sqlite3 5 | .idea 6 | celerybeat-schedule.db -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) django-kafka 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Django Kafka 🚁 2 | Django + Kafka + Celery 🚀 3 | 4 | #### Confluent Installation (Local set-up) 5 | - `pip install confluent-kafka` 6 | - Download confluent platform from: https://www.confluent.io/download/ and unzip the file. 7 | - `export CONFLUENT_HOME=` 8 | - `export PATH=$PATH:$CONFLUENT_HOME/bin` 9 | - `$CONFLUENT_HOME/bin/confluent-hub install \ --no-prompt confluentinc/kafka-connect-datagen:latest` 10 | 11 | The output would be: 12 | ``` 13 | Running in a "--no-prompt" mode 14 | ... 15 | Completed 16 | ``` 17 | 18 | - Start the service: `confluent local start` 19 | The output should resemble: 20 | ``` 21 | Starting Zookeeper 22 | Zookeeper is [UP] 23 | Starting Kafka 24 | Kafka is [UP] 25 | Starting Schema Registry 26 | Schema Registry is [UP] 27 | Starting Kafka REST 28 | Kafka REST is [UP] 29 | Starting Connect 30 | Connect is [UP] 31 | Starting KSQL Server 32 | KSQL Server is [UP] 33 | Starting Control Center 34 | Control Center is [UP] 35 | ``` 36 | - Stop services: `confluent local stop` 37 | 38 | Or set-up Kafka and Zookeeper Separately 39 | #### Kafka 40 | - Simply put, Kafka is a distributed publish-subscribe messaging system that maintains feeds of messages in partitioned and replicated topics. 41 | - In the simplest way there are three players in the Kafka ecosystem: producers, topics (run by brokers) and consumers. 42 | 43 | #### Installation Zookeeper (MacOS) 44 | - `brew install zookeeper` 45 | - Where is installation directory of zookeeper : `/usr/local/Cellar/zookeeper` 46 | 47 | #### Start Zookeeper 48 | - In foreground `zkServer start` 49 | - In background `brew services start zookeeper` 50 | 51 | #### Installation Kafka (MacOS) 52 | - `brew install kafka` 53 | - Where is installation directory of Kafka : `/usr/local/Cellar/kafka` 54 | 55 | #### Start Kafka 56 | - In foreground `brew services start kafka` 57 | - In background `zookeeper-server-start /usr/local/etc/kafka/zookeeper.properties & kafka-server-start /usr/local/etc/kafka/server.properties` 58 | 59 | For details on how to set-up a django project with best practices: https://pyblog.xyz/django-initial-setup/ 60 | 61 | #### Celery Installation - For polling 62 | - `pip install amqp` 63 | - `pip install celery==4.4.0` (Note: Celery 5.0 is not compatible with django-celery-beat) 64 | - `pip install django-celery-beat` 65 | 66 | - Start worker and beat `celery -A django_kafka beat -l INFO -S django` 67 | - Start RabbitMQ `brew services start rabbitmq` 68 | 69 | #### Update settings.py 70 | ``` 71 | INSTALLED_APPS = [ 72 | ..., 73 | 'django_celery_beat', 74 | ] 75 | ``` 76 | - Run migrations: `python manage.py migrate django_celery_beat` 77 | 78 | Note: In this project, the same Django project is the Producer and Consumer, but you can choose you have a standalone consumer. 79 | 80 | #### Have a look at: 81 | - https://docs.confluent.io/current/getting-started.html 82 | - https://www.instaclustr.com/apache-kafka-architecture/ 83 | - https://github.com/confluentinc/confluent-kafka-python 84 | - https://docs.confluent.io/current/schema-registry/index.html 85 | - https://docs.confluent.io/current/quickstart/ce-quickstart.html 86 | 87 | ###### Note: The project is an example for Django application as a producer/consumer 😋 88 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /celerybeat-schedule.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/addu390/django-kafka/4b039d374f89ced84cec1836c7f535b0a769a9b8/celerybeat-schedule.db -------------------------------------------------------------------------------- /django_kafka/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals 2 | from .celery import app as celery_app 3 | __all__ = ('celery_app',) -------------------------------------------------------------------------------- /django_kafka/asgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASGI config for django_kafka project. 3 | 4 | It exposes the ASGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/asgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.asgi import get_asgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'django_kafka.settings') 15 | 16 | application = get_asgi_application() 17 | -------------------------------------------------------------------------------- /django_kafka/celery.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals 2 | import os 3 | from celery import Celery 4 | 5 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'django_kafka.settings') 6 | 7 | app = Celery('django_kafka') 8 | 9 | # app.config_from_object('django.conf:settings') 10 | 11 | app.autodiscover_tasks() 12 | -------------------------------------------------------------------------------- /django_kafka/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | from decouple import config 3 | 4 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 5 | 6 | SECRET_KEY = config('SECRET_KEY') 7 | 8 | DEBUG = config('DEBUG', default=False, cast=bool) 9 | 10 | INSTALLED_APPS = [ 11 | 'django.contrib.admin', 12 | 'django.contrib.auth', 13 | 'django.contrib.contenttypes', 14 | 'django.contrib.sessions', 15 | 'django.contrib.messages', 16 | 'django.contrib.staticfiles', 17 | 'django_celery_beat', 18 | 'publish_subscribe' 19 | ] 20 | ALLOWED_HOSTS = ['*'] 21 | 22 | CELERY_BROKER_URL = 'amqp://127.0.0.1:5672' 23 | SCHEMA_REGISTRY_URL = config('SCHEMA_REGISTRY_URL') 24 | 25 | MIDDLEWARE = [ 26 | 'django.middleware.security.SecurityMiddleware', 27 | 'django.contrib.sessions.middleware.SessionMiddleware', 28 | 'django.middleware.common.CommonMiddleware', 29 | 'django.middleware.csrf.CsrfViewMiddleware', 30 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 31 | 'django.contrib.messages.middleware.MessageMiddleware', 32 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 33 | ] 34 | 35 | ROOT_URLCONF = 'django_kafka.urls' 36 | 37 | TEMPLATES = [ 38 | { 39 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 40 | 'DIRS': [], 41 | 'APP_DIRS': True, 42 | 'OPTIONS': { 43 | 'context_processors': [ 44 | 'django.template.context_processors.debug', 45 | 'django.template.context_processors.request', 46 | 'django.contrib.auth.context_processors.auth', 47 | 'django.contrib.messages.context_processors.messages', 48 | ], 49 | }, 50 | }, 51 | ] 52 | 53 | WSGI_APPLICATION = 'django_kafka.wsgi.application' 54 | 55 | DATABASES = { 56 | 'default': { 57 | 'ENGINE': 'django.db.backends.mysql', 58 | 'NAME': config('DATABASE_NAME'), 59 | 'USER': config('DATABASE_USER'), 60 | 'PASSWORD': config('DATABASE_PASSWORD'), 61 | 'HOST': config('HOST_ENDPOINT'), 62 | 'PORT': '3306', 63 | } 64 | } 65 | 66 | AUTH_PASSWORD_VALIDATORS = [ 67 | { 68 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 69 | }, 70 | { 71 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 72 | }, 73 | { 74 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 75 | }, 76 | { 77 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 78 | }, 79 | ] 80 | 81 | LANGUAGE_CODE = 'en-us' 82 | 83 | TIME_ZONE = 'UTC' 84 | 85 | USE_I18N = True 86 | 87 | USE_L10N = True 88 | 89 | USE_TZ = True 90 | 91 | STATIC_URL = '/static/' 92 | -------------------------------------------------------------------------------- /django_kafka/urls.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.urls import path, include 3 | 4 | urlpatterns = [ 5 | path('admin/', admin.site.urls), 6 | path('', include('publish_subscribe.urls')), 7 | ] -------------------------------------------------------------------------------- /django_kafka/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for django_kafka project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'django_kafka.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def main(): 6 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'django_kafka.settings') 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError as exc: 10 | raise ImportError( 11 | "Couldn't import Django. Are you sure it's installed and " 12 | "available on your PYTHONPATH environment variable? Did you " 13 | "forget to activate a virtual environment?" 14 | ) from exc 15 | execute_from_command_line(sys.argv) 16 | 17 | 18 | if __name__ == '__main__': 19 | main() 20 | -------------------------------------------------------------------------------- /publish_subscribe/__init__.py: -------------------------------------------------------------------------------- 1 | # default_app_config = 'publish_subscribe.apps.PublishSubscribeConfig' 2 | -------------------------------------------------------------------------------- /publish_subscribe/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /publish_subscribe/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class PublishSubscribeConfig(AppConfig): 5 | name = 'publish_subscribe' 6 | 7 | -------------------------------------------------------------------------------- /publish_subscribe/constants.py: -------------------------------------------------------------------------------- 1 | USER_SCHEMA = """ 2 | { 3 | "$schema": "http://json-schema.org/draft-07/schema#", 4 | "title": "User", 5 | "description": "A Confluent Kafka Python User", 6 | "type": "object", 7 | "properties": { 8 | "username": { 9 | "description": "User's name", 10 | "type": "string" 11 | }, 12 | "data": { 13 | "description": "User's favorite color", 14 | "type": "string" 15 | } 16 | }, 17 | "required": [ "username", "data" ] 18 | } 19 | """ 20 | 21 | USER_TOPIC = 'leon' 22 | -------------------------------------------------------------------------------- /publish_subscribe/consumer.py: -------------------------------------------------------------------------------- 1 | from confluent_kafka import DeserializingConsumer 2 | from confluent_kafka.schema_registry.json_schema import JSONDeserializer 3 | from confluent_kafka.serialization import StringDeserializer 4 | from .constants import USER_SCHEMA, USER_TOPIC 5 | from .transformers import dict_to_user 6 | import logging 7 | import traceback 8 | 9 | 10 | def receive(): 11 | json_deserializer = JSONDeserializer(USER_SCHEMA, from_dict=dict_to_user) 12 | string_deserializer = StringDeserializer('utf_8') 13 | consumer_conf = {'bootstrap.servers': 'localhost:9092', 14 | 'key.deserializer': string_deserializer, 15 | 'value.deserializer': json_deserializer, 16 | 'group.id': 'django-kafka', 17 | 'auto.offset.reset': "earliest"} 18 | 19 | consumer = DeserializingConsumer(consumer_conf) 20 | consumer.subscribe([USER_TOPIC]) 21 | 22 | """ 23 | The idea is to start the Kafka consumer when the message is sent to the Kafka producer. 24 | Resulting in two queues: Task Queue and Message/Content Queue. 25 | Multi-threading might be an overkill for a simple application, hence the for loop (Temporary). 26 | """ 27 | for x in range(200): 28 | try: 29 | msg = consumer.poll(timeout=5.0) 30 | if msg is not None: 31 | user = msg.value() 32 | if user is not None: 33 | print("User record {}: username: {}\n" 34 | "\tdata: {}\n" 35 | .format(msg.key(), user.username, 36 | user.data)) 37 | 38 | except Exception as e: 39 | print('An exception occurred: {}'.format(e)) 40 | logging.error(traceback.format_exc()) 41 | 42 | -------------------------------------------------------------------------------- /publish_subscribe/models.py: -------------------------------------------------------------------------------- 1 | class UserConsumer(object): 2 | 3 | def __init__(self, username=None, data=None): 4 | self.username = username 5 | self.data = data 6 | 7 | class Meta: 8 | managed = False 9 | 10 | 11 | class UserProducer(object): 12 | 13 | def __init__(self, username, token, data): 14 | self.username = username 15 | self.data = data 16 | self._token = token 17 | 18 | class Meta: 19 | managed = False 20 | -------------------------------------------------------------------------------- /publish_subscribe/producers.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | from confluent_kafka import SerializingProducer 3 | from confluent_kafka.serialization import StringSerializer 4 | from confluent_kafka.schema_registry import SchemaRegistryClient 5 | from confluent_kafka.schema_registry.json_schema import JSONSerializer 6 | 7 | from django_kafka.settings import SCHEMA_REGISTRY_URL 8 | from .constants import USER_SCHEMA, USER_TOPIC 9 | from .transformers import user_to_dict 10 | from .models import UserProducer 11 | 12 | schema_registry_conf = {'url': SCHEMA_REGISTRY_URL} 13 | schema_registry_client = SchemaRegistryClient(schema_registry_conf) 14 | 15 | 16 | def delivery_report(err, msg): 17 | if err is not None: 18 | print("Delivery failed for User record {}: {}".format(msg.key(), err)) 19 | return 20 | print('User record {} successfully produced to {} [{}] at offset {}'.format( 21 | msg.key(), msg.topic(), msg.partition(), msg.offset())) 22 | 23 | 24 | json_serializer = JSONSerializer(USER_SCHEMA, schema_registry_client, user_to_dict) 25 | producer_conf = {'bootstrap.servers': 'localhost:9092', 26 | 'key.serializer': StringSerializer('utf_8'), 27 | 'value.serializer': json_serializer} 28 | producer = SerializingProducer(producer_conf) 29 | 30 | 31 | def send(username, data, token): 32 | user = UserProducer(username=username, 33 | data=data, 34 | token=token) 35 | 36 | producer.produce(topic=USER_TOPIC, key=str(uuid4()), value=user, 37 | on_delivery=delivery_report) 38 | -------------------------------------------------------------------------------- /publish_subscribe/tasks.py: -------------------------------------------------------------------------------- 1 | from celery import shared_task 2 | from .consumer import receive 3 | 4 | 5 | @shared_task 6 | def send_summary(): 7 | receive() 8 | -------------------------------------------------------------------------------- /publish_subscribe/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /publish_subscribe/transformers.py: -------------------------------------------------------------------------------- 1 | from .models import UserConsumer 2 | 3 | 4 | def dict_to_user(obj, ctx): 5 | if obj is None: 6 | return None 7 | 8 | return UserConsumer(username=obj['username'], data=obj['data']) 9 | 10 | 11 | def user_to_dict(user, ctx): 12 | return dict(username=user.username, 13 | data=user.data) 14 | -------------------------------------------------------------------------------- /publish_subscribe/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import path 2 | from .views import SendMessage, ReceiveMessage 3 | 4 | urlpatterns = [ 5 | path('send', SendMessage.as_view(), name='send_message'), 6 | # For testing Producer and Consumer in the same project 7 | path('receive', ReceiveMessage.as_view(), name='receive_message') 8 | ] 9 | -------------------------------------------------------------------------------- /publish_subscribe/views.py: -------------------------------------------------------------------------------- 1 | from rest_framework.views import APIView 2 | from .producers import send 3 | from rest_framework.response import Response 4 | from rest_framework import status 5 | from .consumer import receive 6 | 7 | 8 | class SendMessage(APIView): 9 | 10 | def post(self, request): 11 | username = request.data.get("username") 12 | token = request.data.get("token") 13 | data = request.data.get("data") 14 | 15 | send(username, data, token) 16 | 17 | return Response({"success": True}, status=status.HTTP_201_CREATED) 18 | 19 | 20 | class ReceiveMessage(APIView): 21 | 22 | def post(self, request): 23 | receive() 24 | 25 | return Response({"success": True}, status=status.HTTP_201_CREATED) 26 | --------------------------------------------------------------------------------