├── nginx
├── Dockerfile
└── nginx.conf
├── env.sh
├── .gitignore
├── api
├── wsgi.py
├── extensions.py
├── run.py
├── database
│ ├── schemas
│ │ ├── user_schema.py
│ │ ├── chat_history_schema.py
│ │ ├── functions_history_schema.py
│ │ ├── resources_schema.py
│ │ ├── knowledge_base_schema.py
│ │ └── personal_memory_schema.py
│ ├── models
│ │ ├── user.py
│ │ ├── chat_history.py
│ │ ├── functions_history.py
│ │ ├── resources.py
│ │ ├── knowledge_base.py
│ │ └── personal_memory.py
│ └── db_manager.py
├── routes
│ ├── clear_context_view.py
│ ├── test_view.py
│ ├── db_conversation_id_view.py
│ ├── login_view.py
│ ├── check_english_view.py
│ ├── yt_summary_view.py
│ ├── web_page_summary_view.py
│ └── chat_view.py
└── __init__.py
├── docs
├── logo.png
├── diagram.png
├── logo.py
├── scripts.py
├── todo.md
├── jupyter_notebooks
│ ├── ollama.ipynb
│ ├── web_summary.ipynb
│ ├── yt_video_summary.ipynb
│ └── document_rag.ipynb
├── feature_ideas.md
└── notes.md
├── discord_bot
├── Dockerfile
├── .env.dist
├── bot_commands.py
├── config.py
├── utils.py
└── assistant_bot.py
├── Dockerfile-env
├── update_container.sh
├── Dockerfile
├── docker-compose.local.yml
├── .env.dist
├── alembic
├── README
├── script.py.mako
├── versions
│ ├── b6994a6fb482_add_context_column_to_functions_history.py
│ ├── 7fbcdb262a79_increase_users_password_hash_length.py
│ ├── 264699802ec3_create_users_table.py
│ ├── e3fab275bece_add_chat_history_table.py
│ ├── 0f632f48bc6d_add_functions_history_table.py
│ ├── 6409f4f8492b_add_resources_table.py
│ ├── 265ab9e632ed_add_personal_memory_table.py
│ └── e62d40842589_add_knowledge_base_table.py
└── env.py
├── docker-compose.workflows.yml
├── docker-compose.yml
├── tests
└── test_assistant_api.py
├── README.md
├── alembic.ini
├── requirements.txt
└── .github
└── workflows
└── basic.yml
/nginx/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx
2 | COPY ./nginx.conf /etc/nginx/nginx.conf
--------------------------------------------------------------------------------
/env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | docker build --no-cache -t assistant_env -f Dockerfile-env .
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | __pycache__/
3 | actions-runner/
4 | .DS_Store
5 | .vscode
6 | .history
--------------------------------------------------------------------------------
/api/wsgi.py:
--------------------------------------------------------------------------------
1 | from api.run import app
2 |
3 | if __name__ == "__main__":
4 | app.run()
5 |
--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/janbanot/personal_assistant/HEAD/docs/logo.png
--------------------------------------------------------------------------------
/docs/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/janbanot/personal_assistant/HEAD/docs/diagram.png
--------------------------------------------------------------------------------
/discord_bot/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM assistant_env
2 | WORKDIR /usr/src/app
3 | COPY .env .env
4 | COPY . .
5 | CMD ["python3", "assistant_bot.py"]
--------------------------------------------------------------------------------
/Dockerfile-env:
--------------------------------------------------------------------------------
1 | FROM python:3.11
2 | WORKDIR /usr/src/app
3 | COPY requirements.txt ./
4 | RUN pip install --no-cache-dir -r requirements.txt
--------------------------------------------------------------------------------
/discord_bot/.env.dist:
--------------------------------------------------------------------------------
1 | DISCORD_TOKEN=XYZ
2 | DISCORD_GUILD_ID=1234
3 | API_USER_EMAIL=test@email.com
4 | API_PASSWORD=xyz
5 | API_URL=localhost:8081/
--------------------------------------------------------------------------------
/update_container.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | service="assistant_$1"
4 |
5 | docker-compose build $service
6 | docker-compose up -d --no-deps $service
--------------------------------------------------------------------------------
/api/extensions.py:
--------------------------------------------------------------------------------
1 | from flask_sqlalchemy import SQLAlchemy
2 | from flask_marshmallow import Marshmallow
3 | from flask_jwt_extended import JWTManager
4 |
5 | db = SQLAlchemy()
6 | ma = Marshmallow()
7 | jwt = JWTManager()
8 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM assistant_env
2 | WORKDIR /usr/src/app
3 | COPY .env .env
4 | COPY api ./api
5 | COPY tests ./tests
6 | COPY alembic.ini .
7 | COPY alembic ./alembic
8 | EXPOSE 8080
9 | CMD [ "gunicorn", "--bind", "0.0.0.0:8080", "--timeout", "180", "api.wsgi:app" ]
--------------------------------------------------------------------------------
/nginx/nginx.conf:
--------------------------------------------------------------------------------
1 | events { worker_connections 1024; }
2 |
3 | http {
4 | server {
5 | listen 8081;
6 | server_name localhost;
7 |
8 | location / {
9 | proxy_pass http://assistant_api:8080;
10 | }
11 | }
12 | }
--------------------------------------------------------------------------------
/api/run.py:
--------------------------------------------------------------------------------
1 | import os
2 | from dotenv import load_dotenv
3 | from api import create_app
4 |
5 | load_dotenv()
6 | app = create_app()
7 |
8 | if __name__ == "__main__":
9 | is_debug = os.getenv("FLASK_DEBUG_MODE", "False").lower() == "true"
10 | app.run(debug=is_debug)
11 |
--------------------------------------------------------------------------------
/api/database/schemas/user_schema.py:
--------------------------------------------------------------------------------
1 | from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
2 | from api.database.models.user import User
3 |
4 |
5 | class UserSchema(SQLAlchemyAutoSchema):
6 | class Meta:
7 | model = User
8 | fields = ("id", "email", "login", "password_hash")
9 |
--------------------------------------------------------------------------------
/docker-compose.local.yml:
--------------------------------------------------------------------------------
1 | services:
2 | assistant_proxy:
3 | build:
4 | context: ./nginx
5 | args:
6 | env: local
7 | dockerfile: Dockerfile
8 | ports:
9 | - "8081:8081"
10 | depends_on:
11 | - assistant_api
12 | assistant_qdrant:
13 | ports:
14 | - "6333:6333"
--------------------------------------------------------------------------------
/.env.dist:
--------------------------------------------------------------------------------
1 | FLASK_SECRET_KEY=XYZ
2 | FLASK_DEBUG_MODE=False
3 | POSTGRES_URL=postgresql://user:pass@db:5432/dbname
4 | POSTGRES_USER=user
5 | POSTGRES_PASSWORD=XYZ
6 | POSTGRES_DB=user_db
7 | OPENAI_API_KEY=sk-Abc
8 | LANGCHAIN_API_KEY=ls__abc
9 | LANGCHAIN_TRACING_V2=true
10 | LANGCHAIN_PROJECT=project_name
11 | LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
--------------------------------------------------------------------------------
/alembic/README:
--------------------------------------------------------------------------------
1 | # Alembic Migrations
2 | Examples of using Alembic to manage database migrations:
3 | - to apply migrations to the database, run the following command:
4 | ```bash
5 | alembic upgrade head
6 | ```
7 | - to apply migration in docker-compose, run the following command:
8 | ```bash
9 | docker-compose exec assistant_api alembic upgrade head
10 | ```
--------------------------------------------------------------------------------
/api/database/schemas/chat_history_schema.py:
--------------------------------------------------------------------------------
1 | from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
2 | from api.database.models.chat_history import ChatHistory
3 |
4 |
5 | class UserSchema(SQLAlchemyAutoSchema):
6 | class Meta:
7 | model = ChatHistory
8 | fields = ("message_id", "conversation_id", "user_message", "current_context", "answer")
9 |
--------------------------------------------------------------------------------
/api/database/models/user.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Column, Integer, String
2 | from api.extensions import db
3 |
4 |
5 | class User(db.Model): # type: ignore
6 | __tablename__ = "users"
7 | id = Column(Integer, primary_key=True)
8 | email = Column(String, unique=True)
9 | login = Column(String, unique=True)
10 | password_hash = Column(String)
11 |
--------------------------------------------------------------------------------
/api/routes/clear_context_view.py:
--------------------------------------------------------------------------------
1 | from flask import jsonify
2 | from flask.views import MethodView
3 | from flask_jwt_extended import jwt_required
4 | from api.routes.chat_view import context_memory
5 |
6 |
7 | class ClearView(MethodView):
8 | decorators = [jwt_required()]
9 |
10 | def post(self):
11 | context_memory.clear()
12 | return jsonify({"message": "Context memory cleared"})
13 |
--------------------------------------------------------------------------------
/api/database/models/chat_history.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Column, Integer, String
2 | from api.extensions import db
3 |
4 |
5 | class ChatHistory(db.Model): # type: ignore
6 | __tablename__ = "chat_history"
7 | message_id = Column(Integer, primary_key=True)
8 | conversation_id = Column(Integer)
9 | user_message = Column(String)
10 | current_context = Column(String)
11 | answer = Column(String)
12 |
--------------------------------------------------------------------------------
/api/routes/test_view.py:
--------------------------------------------------------------------------------
1 | from flask import jsonify, request, current_app
2 | from flask.views import MethodView
3 | from flask_jwt_extended import jwt_required
4 |
5 |
6 | class TestView(MethodView):
7 | decorators = [jwt_required()]
8 |
9 | def get(self):
10 | data = "hello world"
11 | current_app.logger.info("Request: %s", request)
12 | current_app.logger.info("Response: %s", data)
13 | return jsonify({"data": data})
14 |
--------------------------------------------------------------------------------
/docs/logo.py:
--------------------------------------------------------------------------------
1 | import os
2 | from dotenv import load_dotenv
3 | from openai import OpenAI
4 |
5 | load_dotenv()
6 | openai_key = os.getenv("OPENAI_API_KEY")
7 | client = OpenAI(api_key=openai_key)
8 |
9 | response = client.images.generate(
10 | model="dall-e-3",
11 | prompt="a neon emblem logo of a robot personal assistant, simple, vector, color",
12 | size="1024x1024",
13 | quality="standard",
14 | n=1,
15 | )
16 |
17 | image_url = response.data[0].url
18 | print(image_url)
19 |
--------------------------------------------------------------------------------
/docker-compose.workflows.yml:
--------------------------------------------------------------------------------
1 | services:
2 | assistant_api:
3 | labels:
4 | - "workflow=${GITHUB_RUN_ID}"
5 | assistant_proxy:
6 | labels:
7 | - "workflow=${GITHUB_RUN_ID}"
8 | assistant_db:
9 | labels:
10 | - "workflow=${GITHUB_RUN_ID}"
11 | assistant_qdrant:
12 | labels:
13 | - "workflow=${GITHUB_RUN_ID}"
14 | assistant_bot:
15 | labels:
16 | - "workflow=${GITHUB_RUN_ID}"
17 | assistant_test:
18 | labels:
19 | - "workflow=${GITHUB_RUN_ID}"
--------------------------------------------------------------------------------
/api/database/schemas/functions_history_schema.py:
--------------------------------------------------------------------------------
1 | from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
2 | from api.database.models.functions_history import FunctionsHistory
3 |
4 |
5 | class FunctionsHistorySchema(SQLAlchemyAutoSchema):
6 | class Meta:
7 | model = FunctionsHistory
8 | fields = (
9 | "id",
10 | "interaction_id",
11 | "function",
12 | "user_input",
13 | "answer",
14 | "created_at",
15 | "context"
16 | )
17 |
--------------------------------------------------------------------------------
/api/database/schemas/resources_schema.py:
--------------------------------------------------------------------------------
1 | from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
2 | from api.database.models.resources import Resource
3 |
4 |
5 | class ResourceSchema(SQLAlchemyAutoSchema):
6 | class Meta:
7 | model = Resource
8 | fields = (
9 | "id",
10 | "name",
11 | "content",
12 | "url",
13 | "tags",
14 | "category",
15 | "active",
16 | "created_at",
17 | "updated_at",
18 | )
19 |
--------------------------------------------------------------------------------
/api/routes/db_conversation_id_view.py:
--------------------------------------------------------------------------------
1 | from flask import jsonify
2 | from flask.views import MethodView
3 | from flask_jwt_extended import jwt_required
4 | from api.database.db_manager import DBManager
5 |
6 |
7 | class DBConversationIdView(MethodView):
8 | decorators = [jwt_required()]
9 |
10 | def __init__(self):
11 | self.db_manager = DBManager()
12 |
13 | def get(self):
14 | conversation_id = self.db_manager.get_current_conversation_id()
15 | return jsonify({"conversation_id": conversation_id})
16 |
--------------------------------------------------------------------------------
/api/database/schemas/knowledge_base_schema.py:
--------------------------------------------------------------------------------
1 | from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
2 | from api.database.models.knowledge_base import KnowledgeBase
3 |
4 |
5 | class KnowledgeBaseSchema(SQLAlchemyAutoSchema):
6 | class Meta:
7 | model = KnowledgeBase
8 | fields = (
9 | "id",
10 | "category",
11 | "tag",
12 | "content",
13 | "source",
14 | "created_at",
15 | "updated_at",
16 | "last_accessed_at",
17 | "active",
18 | )
19 |
--------------------------------------------------------------------------------
/api/database/schemas/personal_memory_schema.py:
--------------------------------------------------------------------------------
1 | from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
2 | from api.database.models.personal_memory import PersonalMemory
3 |
4 |
5 | class PersonalMemorySchema(SQLAlchemyAutoSchema):
6 | class Meta:
7 | model = PersonalMemory
8 | fields = (
9 | "id",
10 | "name",
11 | "description",
12 | "source",
13 | "category",
14 | "tags",
15 | "created_at",
16 | "updated_at",
17 | "active",
18 | )
19 |
--------------------------------------------------------------------------------
/api/database/models/functions_history.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Column, Integer, String, Text, DateTime
2 | from api.extensions import db
3 | import sqlalchemy as sa
4 |
5 |
6 | class FunctionsHistory(db.Model): # type: ignore
7 | __tablename__ = "functions_history"
8 | id = Column(Integer, primary_key=True)
9 | interaction_id = Column(Integer, nullable=False)
10 | function = Column(String(255), nullable=False)
11 | user_input = Column(Text, nullable=False)
12 | answer = Column(Text, nullable=False)
13 | created_at = Column(DateTime, nullable=False, server_default=sa.func.now())
14 | context = Column(Text, nullable=True)
15 |
--------------------------------------------------------------------------------
/alembic/script.py.mako:
--------------------------------------------------------------------------------
1 | """${message}
2 |
3 | Revision ID: ${up_revision}
4 | Revises: ${down_revision | comma,n}
5 | Create Date: ${create_date}
6 |
7 | """
8 | from typing import Sequence, Union
9 |
10 | from alembic import op
11 | import sqlalchemy as sa
12 | ${imports if imports else ""}
13 |
14 | # revision identifiers, used by Alembic.
15 | revision: str = ${repr(up_revision)}
16 | down_revision: Union[str, None] = ${repr(down_revision)}
17 | branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18 | depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19 |
20 |
21 | def upgrade() -> None:
22 | ${upgrades if upgrades else "pass"}
23 |
24 |
25 | def downgrade() -> None:
26 | ${downgrades if downgrades else "pass"}
27 |
--------------------------------------------------------------------------------
/alembic/versions/b6994a6fb482_add_context_column_to_functions_history.py:
--------------------------------------------------------------------------------
1 | """add context column to functions history
2 |
3 | Revision ID: b6994a6fb482
4 | Revises: 6409f4f8492b
5 | Create Date: 2024-06-22 09:34:02.802263
6 |
7 | """
8 | from typing import Sequence, Union
9 |
10 | from alembic import op
11 | import sqlalchemy as sa
12 |
13 |
14 | # revision identifiers, used by Alembic.
15 | revision: str = 'b6994a6fb482'
16 | down_revision: Union[str, None] = '6409f4f8492b'
17 | branch_labels: Union[str, Sequence[str], None] = None
18 | depends_on: Union[str, Sequence[str], None] = None
19 |
20 |
21 | def upgrade() -> None:
22 | op.add_column('functions_history', sa.Column('context', sa.Text(), nullable=True))
23 |
24 |
25 | def downgrade() -> None:
26 | op.drop_column('functions_history', 'context')
27 |
--------------------------------------------------------------------------------
/api/database/models/resources.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean
2 | from api.extensions import db
3 | import sqlalchemy as sa
4 |
5 |
6 | class Resource(db.Model): # type: ignore
7 | __tablename__ = "resources"
8 | id = Column(Integer, primary_key=True)
9 | name = Column(String(255), nullable=False)
10 | content = Column(Text, nullable=True)
11 | url = Column(String(255), nullable=True)
12 | tags = Column(String(255), nullable=True)
13 | category = Column(String(255), nullable=False)
14 | active = Column(Boolean, nullable=False, server_default="true")
15 | created_at = Column(DateTime, nullable=False, server_default=sa.func.now())
16 | updated_at = Column(
17 | DateTime, nullable=False, server_default=sa.func.now(), onupdate=sa.func.now()
18 | )
19 |
--------------------------------------------------------------------------------
/api/database/models/knowledge_base.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean
2 | from api.extensions import db
3 | import sqlalchemy as sa
4 |
5 |
6 | class KnowledgeBase(db.Model): # type: ignore
7 | __tablename__ = "knowledge_base"
8 | id = Column(Integer, primary_key=True)
9 | category = Column(String(255), nullable=False)
10 | tag = Column(String(255), nullable=False)
11 | content = Column(Text, nullable=False)
12 | source = Column(String(255), nullable=True)
13 | created_at = Column(DateTime, nullable=False, server_default=sa.func.now())
14 | updated_at = Column(
15 | DateTime, nullable=False, server_default=sa.func.now(), onupdate=sa.func.now()
16 | )
17 | last_accessed_at = Column(DateTime, nullable=True)
18 | active = Column(Boolean, nullable=False, server_default="true")
19 |
--------------------------------------------------------------------------------
/api/database/models/personal_memory.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean
2 | from api.extensions import db
3 | import sqlalchemy as sa
4 |
5 |
6 | class PersonalMemory(db.Model): # type: ignore
7 | __tablename__ = "personal_memory"
8 | id = Column(Integer, primary_key=True)
9 | name = Column(String(255), nullable=False)
10 | description = Column(Text, nullable=True)
11 | source = Column(String(255), nullable=True)
12 | category = Column(String(255), nullable=False)
13 | tags = Column(String(255), nullable=True)
14 | created_at = Column(DateTime, nullable=False, server_default=sa.func.now())
15 | updated_at = Column(
16 | DateTime, nullable=False, server_default=sa.func.now(), onupdate=sa.func.now()
17 | )
18 | active = Column(Boolean, nullable=False, server_default="true")
19 |
--------------------------------------------------------------------------------
/discord_bot/bot_commands.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from collections import namedtuple
3 |
4 | Command = namedtuple("Command", ["name", "description"])
5 |
6 |
7 | class BotCommands(Enum):
8 | LIST_COMMANDS = Command("list-commands", "list all available commands")
9 | YT_SUMMARY = Command(
10 | "yt-summary", "get a summary of a YouTube video, provide a URL"
11 | )
12 | PAGE_SUMMARY = Command(
13 | "page-summary", "get a summary of a page, provide a URL"
14 | )
15 | CHECK_ENGLISH = Command(
16 | "check-english",
17 | "check and fix grammatical, spelling, and punctuation errors in English text",
18 | )
19 |
20 |
21 | def get_bot_commands():
22 | commands = []
23 | for command in BotCommands:
24 | commands.append(f"- **{command.value.name}**: {command.value.description}")
25 | return "\n".join(commands)
26 |
--------------------------------------------------------------------------------
/alembic/versions/7fbcdb262a79_increase_users_password_hash_length.py:
--------------------------------------------------------------------------------
1 | """increase users.password_hash length
2 |
3 | Revision ID: 7fbcdb262a79
4 | Revises: 264699802ec3
5 | Create Date: 2024-03-06 00:18:57.968757
6 |
7 | """
8 |
9 | from typing import Sequence, Union
10 |
11 | from alembic import op
12 | import sqlalchemy as sa
13 |
14 |
15 | # revision identifiers, used by Alembic.
16 | revision: str = "7fbcdb262a79"
17 | down_revision: Union[str, None] = "264699802ec3"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 |
21 |
22 | def upgrade():
23 | op.alter_column(
24 | "users", "password_hash", existing_type=sa.String(128), type_=sa.String(256)
25 | )
26 |
27 |
28 | def downgrade():
29 | op.alter_column(
30 | "users", "password_hash", existing_type=sa.String(256), type_=sa.String(128)
31 | )
32 |
--------------------------------------------------------------------------------
/alembic/versions/264699802ec3_create_users_table.py:
--------------------------------------------------------------------------------
1 | """create users table
2 |
3 | Revision ID: 264699802ec3
4 | Create Date: 2024-03-03 18:25:23.656599
5 |
6 | """
7 |
8 | from typing import Sequence, Union
9 |
10 | from alembic import op
11 | import sqlalchemy as sa
12 |
13 |
14 | # revision identifiers, used by Alembic.
15 | revision: str = "264699802ec3"
16 | down_revision: Union[str, None] = None
17 | branch_labels: Union[str, Sequence[str], None] = None
18 | depends_on: Union[str, Sequence[str], None] = None
19 |
20 |
21 | def upgrade():
22 | op.create_table(
23 | "users",
24 | sa.Column("id", sa.Integer, primary_key=True),
25 | sa.Column("email", sa.String(120), unique=True, nullable=False),
26 | sa.Column("login", sa.String(64), unique=True, nullable=False),
27 | sa.Column("password_hash", sa.String(128), nullable=False),
28 | )
29 |
30 |
31 | def downgrade():
32 | op.drop_table("users")
33 |
--------------------------------------------------------------------------------
/alembic/versions/e3fab275bece_add_chat_history_table.py:
--------------------------------------------------------------------------------
1 | """add chat_history table
2 |
3 | Revision ID: e3fab275bece
4 | Revises: 7fbcdb262a79
5 | Create Date: 2024-05-13 07:16:11.484875
6 |
7 | """
8 |
9 | from typing import Sequence, Union
10 |
11 | from alembic import op
12 | import sqlalchemy as sa
13 |
14 |
15 | # revision identifiers, used by Alembic.
16 | revision: str = "e3fab275bece"
17 | down_revision: Union[str, None] = "7fbcdb262a79"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 |
21 |
22 | def upgrade():
23 | op.create_table(
24 | "chat_history",
25 | sa.Column("message_id", sa.Integer, primary_key=True),
26 | sa.Column("conversation_id", sa.Integer),
27 | sa.Column("user_message", sa.String),
28 | sa.Column("current_context", sa.String),
29 | sa.Column("answer", sa.String),
30 | )
31 |
32 |
33 | def downgrade():
34 | op.drop_table("chat_history")
35 |
--------------------------------------------------------------------------------
/api/database/db_manager.py:
--------------------------------------------------------------------------------
1 | from api.database.models.chat_history import ChatHistory
2 | from api.extensions import db
3 |
4 |
5 | class DBManager:
6 | def save_message(self, conversation_id, user_message, current_context, answer):
7 | new_message = ChatHistory(
8 | conversation_id=conversation_id,
9 | user_message=user_message,
10 | current_context=current_context,
11 | answer=answer,
12 | )
13 | db.session.add(new_message)
14 | db.session.commit()
15 |
16 | def get_messages_by_conversation(self, conversation_id):
17 | messages = (
18 | db.session.query(ChatHistory)
19 | .filter(ChatHistory.conversation_id == conversation_id)
20 | .all()
21 | )
22 | return messages
23 |
24 | def get_current_conversation_id(self):
25 | latest_message = db.session.query(ChatHistory).order_by(ChatHistory.message_id.desc()).first()
26 | return latest_message.conversation_id if latest_message else 1
27 |
--------------------------------------------------------------------------------
/docs/scripts.py:
--------------------------------------------------------------------------------
1 | # To run the script you need to execute bash in a runnig api container
2 | # create a file, paste the content using the cat command (https://stackoverflow.com/a/60224966)
3 | # cat > file_to_edit
4 | # 1 Write or Paste you text
5 | # 2 don't forget to leave a blank line at the end of file
6 | # 3 Ctrl + C to apply configuration
7 | # and run the script
8 | from api.run import app
9 | from api.extensions import db
10 | from api.database.models.user import User
11 | from werkzeug.security import generate_password_hash
12 |
13 |
14 | def create_admin_user(email: str, login: str, password: str):
15 | hashed_password = generate_password_hash(password)
16 | new_user = User(email=email, login=login, password_hash=hashed_password)
17 | db.session.add(new_user)
18 | db.session.commit()
19 |
20 |
21 | if __name__ == "__main__":
22 | with app.app_context(): # Create an application context
23 | email = input("Enter email: ")
24 | login = input("Enter login: ")
25 | password = input("Enter password: ")
26 | create_admin_user(email, login, password)
27 | print("User created")
28 |
--------------------------------------------------------------------------------
/alembic/versions/0f632f48bc6d_add_functions_history_table.py:
--------------------------------------------------------------------------------
1 | """add functions_history table
2 |
3 | Revision ID: 0f632f48bc6d
4 | Revises: e62d40842589
5 | Create Date: 2024-06-15 13:14:00.332058
6 |
7 | """
8 |
9 | from typing import Sequence, Union
10 |
11 | from alembic import op
12 | import sqlalchemy as sa
13 |
14 |
15 | # revision identifiers, used by Alembic.
16 | revision: str = "0f632f48bc6d"
17 | down_revision: Union[str, None] = "e62d40842589"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 |
21 |
22 | def upgrade() -> None:
23 | op.create_table(
24 | "functions_history",
25 | sa.Column("id", sa.Integer, primary_key=True),
26 | sa.Column("interaction_id", sa.Integer, nullable=False),
27 | sa.Column("function", sa.String(255), nullable=False),
28 | sa.Column("user_input", sa.Text, nullable=False),
29 | sa.Column("answer", sa.Text, nullable=False),
30 | sa.Column(
31 | "created_at", sa.DateTime, nullable=False, server_default=sa.func.now()
32 | ),
33 | )
34 |
35 |
36 | def downgrade() -> None:
37 | op.drop_table("functions_history")
38 |
--------------------------------------------------------------------------------
/api/routes/login_view.py:
--------------------------------------------------------------------------------
1 | from flask import jsonify, request
2 | from flask.views import MethodView
3 | from werkzeug.security import check_password_hash
4 | from flask_jwt_extended import create_access_token
5 | from datetime import timedelta
6 | from api.database.models.user import User
7 | # from api.database.models.user_schema import UserSchema
8 |
9 |
10 | class LoginView(MethodView):
11 |
12 | def post(self):
13 | # user_schema = UserSchema()
14 | # users_schema = UserSchema(many=True)
15 |
16 | if request.is_json:
17 | email = request.json["email"]
18 | password = request.json["password"]
19 | else:
20 | email = request.form["email"]
21 | password = request.form["password"]
22 |
23 | user = User.query.filter_by(email=email).first()
24 | if user and check_password_hash(user.password_hash, password):
25 | expires = timedelta(minutes=30)
26 | access_token = create_access_token(identity=email, expires_delta=expires)
27 | return jsonify(
28 | message="Login Successful",
29 | access_token=access_token,
30 | expires_in=expires.total_seconds(),
31 | )
32 | else:
33 | return jsonify("Bad email or Password"), 401
34 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | assistant_api:
3 | restart: on-failure
4 | build:
5 | context: ./
6 | dockerfile: Dockerfile
7 | depends_on:
8 | - assistant_db
9 | environment:
10 | POSTGRES_URL: ${POSTGRES_URL}
11 | PYTHONPATH: /usr/src/app
12 | assistant_proxy:
13 | restart: on-failure
14 | build:
15 | context: ./nginx
16 | args:
17 | env: prod
18 | dockerfile: Dockerfile
19 | depends_on:
20 | - assistant_api
21 | assistant_db:
22 | image: postgres:latest
23 | restart: on-failure
24 | environment:
25 | POSTGRES_USER: ${POSTGRES_USER}
26 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
27 | POSTGRES_DB: ${POSTGRES_DB}
28 | volumes:
29 | - postgres_data:/var/lib/postgresql/data/
30 | assistant_qdrant:
31 | image: qdrant/qdrant:latest
32 | volumes:
33 | - qdrant_data:/qdrant/storage
34 | assistant_bot:
35 | restart: on-failure
36 | build:
37 | context: ./discord_bot
38 | dockerfile: Dockerfile
39 | volumes:
40 | - assistant_bot_data:/app/data
41 | assistant_test:
42 | build:
43 | context: ./
44 | dockerfile: Dockerfile
45 | command: pytest tests/
46 | environment:
47 | POSTGRES_URL: ${POSTGRES_URL}
48 | PYTHONPATH: /usr/src/app
49 |
50 | volumes:
51 | postgres_data:
52 | qdrant_data:
53 | assistant_bot_data:
--------------------------------------------------------------------------------
/api/routes/check_english_view.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | from flask import jsonify, request
3 | from flask.views import MethodView
4 | from flask_jwt_extended import jwt_required
5 | from langchain_openai import ChatOpenAI
6 | from langchain_core.output_parsers import StrOutputParser
7 | from langchain_core.prompts import ChatPromptTemplate
8 |
9 | load_dotenv()
10 |
11 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
12 |
13 | system_prompt = """
14 | Adjust the user's text to rectify grammatical, spelling, and punctuation errors, maintaining the original layout.
15 | Interpret ambiguities with discernment. Overlook extraneous comments.
16 | Return only the rectified text!
17 | Examples:
18 | Q: wheres the best place to meet for a quick chat?
19 | A: Where's the best place to meet for a quick chat?
20 | Q: i cant believe its already been a year since we started this project!
21 | A: I can't believe it's already been a year since we started this project!
22 | ###
23 | User's text: {text}
24 | """
25 |
26 | prompt = ChatPromptTemplate.from_template(system_prompt)
27 | output_parser = StrOutputParser()
28 |
29 |
30 | class CheckEnglishView(MethodView):
31 | decorators = [jwt_required()]
32 |
33 | def post(self):
34 | data = request.get_json()
35 | text = data.get("text", "")
36 |
37 | chain = prompt | llm | output_parser
38 | answer = chain.invoke({"text": text})
39 |
40 | return jsonify({"text": answer})
41 |
--------------------------------------------------------------------------------
/alembic/versions/6409f4f8492b_add_resources_table.py:
--------------------------------------------------------------------------------
1 | """add resources table
2 |
3 | Revision ID: 6409f4f8492b
4 | Revises: 265ab9e632ed
5 | Create Date: 2024-06-15 13:17:26.673095
6 |
7 | """
8 |
9 | from typing import Sequence, Union
10 |
11 | from alembic import op
12 | import sqlalchemy as sa
13 |
14 |
15 | # revision identifiers, used by Alembic.
16 | revision: str = "6409f4f8492b"
17 | down_revision: Union[str, None] = "265ab9e632ed"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 |
21 |
22 | def upgrade() -> None:
23 | op.create_table(
24 | "resources",
25 | sa.Column("id", sa.Integer, primary_key=True),
26 | sa.Column("name", sa.String(255), nullable=False),
27 | sa.Column("content", sa.Text, nullable=True),
28 | sa.Column("url", sa.String(255), nullable=True),
29 | sa.Column("tags", sa.String(255), nullable=True),
30 | sa.Column("category", sa.String(255), nullable=False),
31 | sa.Column("active", sa.Boolean, nullable=False, server_default="true"),
32 | sa.Column(
33 | "created_at", sa.DateTime, nullable=False, server_default=sa.func.now()
34 | ),
35 | sa.Column(
36 | "updated_at",
37 | sa.DateTime,
38 | nullable=False,
39 | server_default=sa.func.now(),
40 | onupdate=sa.func.now(),
41 | ),
42 | )
43 |
44 |
45 | def downgrade() -> None:
46 | op.drop_table("resources")
47 |
--------------------------------------------------------------------------------
/alembic/versions/265ab9e632ed_add_personal_memory_table.py:
--------------------------------------------------------------------------------
1 | """add personal_memory table
2 |
3 | Revision ID: 265ab9e632ed
4 | Revises: 0f632f48bc6d
5 | Create Date: 2024-06-15 13:15:35.943222
6 |
7 | """
8 |
9 | from typing import Sequence, Union
10 |
11 | from alembic import op
12 | import sqlalchemy as sa
13 |
14 |
15 | # revision identifiers, used by Alembic.
16 | revision: str = "265ab9e632ed"
17 | down_revision: Union[str, None] = "0f632f48bc6d"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 |
21 |
22 | def upgrade() -> None:
23 | op.create_table(
24 | "personal_memory",
25 | sa.Column("id", sa.Integer, primary_key=True),
26 | sa.Column("name", sa.String(255), nullable=False),
27 | sa.Column("description", sa.Text, nullable=True),
28 | sa.Column("source", sa.String(255), nullable=True),
29 | sa.Column("category", sa.String(255), nullable=False),
30 | sa.Column("tags", sa.String(255), nullable=True),
31 | sa.Column(
32 | "created_at", sa.DateTime, nullable=False, server_default=sa.func.now()
33 | ),
34 | sa.Column(
35 | "updated_at",
36 | sa.DateTime,
37 | nullable=False,
38 | server_default=sa.func.now(),
39 | onupdate=sa.func.now(),
40 | ),
41 | sa.Column("active", sa.Boolean, nullable=False, server_default="true"),
42 | )
43 |
44 |
45 | def downgrade() -> None:
46 | op.drop_table("personal_memory")
47 |
--------------------------------------------------------------------------------
/alembic/versions/e62d40842589_add_knowledge_base_table.py:
--------------------------------------------------------------------------------
1 | """add knowledge_base table
2 |
3 | Revision ID: e62d40842589
4 | Revises: e3fab275bece
5 | Create Date: 2024-06-15 13:09:52.554254
6 |
7 | """
8 |
9 | from typing import Sequence, Union
10 |
11 | from alembic import op
12 | import sqlalchemy as sa
13 |
14 |
15 | # revision identifiers, used by Alembic.
16 | revision: str = "e62d40842589"
17 | down_revision: Union[str, None] = "e3fab275bece"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 |
21 |
22 | def upgrade() -> None:
23 | op.create_table(
24 | "knowledge_base",
25 | sa.Column("id", sa.Integer, primary_key=True),
26 | sa.Column("category", sa.String(255), nullable=False),
27 | sa.Column("tag", sa.String(255), nullable=False),
28 | sa.Column("content", sa.Text, nullable=False),
29 | sa.Column("source", sa.String(255), nullable=True),
30 | sa.Column(
31 | "created_at", sa.DateTime, nullable=False, server_default=sa.func.now()
32 | ),
33 | sa.Column(
34 | "updated_at",
35 | sa.DateTime,
36 | nullable=False,
37 | server_default=sa.func.now(),
38 | onupdate=sa.func.now(),
39 | ),
40 | sa.Column("last_accessed_at", sa.DateTime, nullable=True),
41 | sa.Column("active", sa.Boolean, nullable=False, server_default="true"),
42 | )
43 |
44 |
45 | def downgrade() -> None:
46 | op.drop_table("knowledge_base")
47 |
--------------------------------------------------------------------------------
/docs/todo.md:
--------------------------------------------------------------------------------
1 | # To Do List/Backlog for the project
2 | - [x] project structure and initial setup
3 | - [x] dockerize the app
4 | - [x] add db migrations mechanism
5 | - [x] add unit tests
6 | - [x] add token-based authentication (JWT)
7 | - [x] add test user and login endpoint
8 | - [x] add langchain
9 | - [x] add qdrant
10 | - [x] create conversation bot foundation
11 | - [x] configure CI/CD - github actions
12 | - [x] rethink whole CI/CD workflow once again!
13 | - [x] add discord bot
14 | - [x] add basic option to talk with bot using model
15 | - [x] YT video summary in English
16 | - [] longterm memory and personalization RAG
17 | - [] add functionality to create bookmarks (yt-videos, articles, etc.)
18 | - [] add integration with apple watch using shortcuts to hit endpoints
19 | - [] google search endpoint
20 | - [] test diffrent models
21 | - [] anthropic claude/haiku
22 | - [] groq
23 |
24 | # Nice to have/do
25 | - [] change used library from request to aiohttp to allows async requests to improve performance
26 | - [] add types to the project
27 | - [x] add langsmith support
28 | - [] try what can be achieved with gpt-4o
29 | - [] refactor the code to avoid duplicates
30 | - [] play with agents approach
31 | - [] check cloudflare workers
32 | - [] check test containers (https://testcontainers.com/, https://www.docker.com/blog/local-development-of-go-applications-with-testcontainers/)
33 | - [] check idea of obsidian vault as knwoledge base
34 |
35 | # Bugs
36 | - [] fix issue with api not working after some time, no request are being processed and bot is throwning an error about missing summary in response. Add handling for such error and try to fix that issue
37 | - [] check why bot cointainer starts automatically when the docker engine is started
38 | - [] no login attempt before /commands, so there is error in case of no valid token
39 |
--------------------------------------------------------------------------------
/tests/test_assistant_api.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from api.run import app
3 |
4 |
5 | @pytest.fixture
6 | def client():
7 | app.config["TESTING"] = True
8 | with app.test_client() as client:
9 | yield client
10 |
11 |
12 | def login(client, email, password):
13 | data = {"email": email, "password": password}
14 | headers = {"Content-Type": "application/json"}
15 | return client.post("/login", json=data, headers=headers)
16 |
17 |
18 | def chat(client, message, headers):
19 | data = {"message": message}
20 | return client.post("/chat", json=data, headers=headers)
21 |
22 | # TODO: write a test without using the hardcoded credentials
23 | # def test_login_route(client):
24 | # response = login(client, "test@test.com", "test1")
25 | # assert response.status_code == 200
26 | # assert response.json["message"] == "Login Successful"
27 | # assert "access_token" in response.json
28 | # assert response.json["expires_in"] == 1800
29 |
30 |
31 | def test_login_route_with_incorrect_password(client):
32 | response = login(client, "wrong@email.com", "wrong_password")
33 | assert response.status_code == 401
34 | assert "access_token" not in response.json
35 |
36 |
37 | # def test_login_route_with_incorrect_email(client):
38 | # response = login(client, "test1@test.com", "test1")
39 | # assert response.status_code == 401
40 | # assert "access_token" not in response.json
41 |
42 | # def test_access_chat_route_with_valid_token(client):
43 | # login_response = login(client, "test@test.com", "test1")
44 | # valid_token = login_response.json["access_token"]
45 | # headers = {
46 | # "Content-Type": "application/json",
47 | # "Authorization": f"Bearer {valid_token}",
48 | # }
49 | # chat_response = chat(client, "hello", headers)
50 | # assert chat_response.status_code == 200
51 |
--------------------------------------------------------------------------------
/docs/jupyter_notebooks/ollama.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# source: https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md\n",
10 | "\n",
11 | "from langchain_community.llms import Ollama\n",
12 | "from langchain_community.document_loaders import WebBaseLoader\n",
13 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
14 | "from langchain_community.embeddings import OllamaEmbeddings\n",
15 | "from langchain_community.vectorstores import Chroma\n",
16 | "from langchain.chains import RetrievalQA\n",
17 | "\n",
18 | "ollama = Ollama(base_url='http://localhost:11434', model=\"llama2\")\n",
19 | "\n",
20 | "loader = WebBaseLoader(\"https://www.gutenberg.org/files/1727/1727-h/1727-h.htm\")\n",
21 | "data = loader.load()\n",
22 | "\n",
23 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
24 | "all_splits = text_splitter.split_documents(data)\n",
25 | "\n",
26 | "oembed = OllamaEmbeddings(base_url=\"http://localhost:11434\", model=\"nomic-embed-text\")\n",
27 | "# store the emeddings locally - maybe qdrant?\n",
28 | "vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)\n",
29 | "\n",
30 | "question = \"Who is Neleus and who is in Neleus' family?\"\n",
31 | "print(question)\n",
32 | "docs = vectorstore.similarity_search(question)\n",
33 | "\n",
34 | "qachain = RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())\n",
35 | "print(qachain.invoke({\"query\": question}))"
36 | ]
37 | }
38 | ],
39 | "metadata": {
40 | "kernelspec": {
41 | "display_name": "jupyter",
42 | "language": "python",
43 | "name": "python3"
44 | },
45 | "language_info": {
46 | "name": "python",
47 | "version": "3.11.7"
48 | }
49 | },
50 | "nbformat": 4,
51 | "nbformat_minor": 2
52 | }
53 |
--------------------------------------------------------------------------------
/discord_bot/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 |
4 |
5 | class Config:
6 | def __init__(self):
7 | self._api_token = None
8 | self._api_token_expires_at = None
9 | self._current_conversation_id = None
10 | self._current_conversation_last_message_timestamp = None
11 | self.discord_token = os.getenv("DISCORD_TOKEN")
12 | self.discord_guild_id = os.getenv("DISCORD_GUILD_ID")
13 | # chat, chat-testing
14 | self.chatting_channels_ids = [1238228569021349948, 1238223813997756446]
15 |
16 | if not self.discord_token:
17 | raise ValueError("DISCORD_TOKEN is not set")
18 |
19 | if not self.discord_guild_id:
20 | raise ValueError("DISCORD_GUILD_ID is not set")
21 |
22 | @property
23 | def api_token(self):
24 | return self._api_token
25 |
26 | @api_token.setter
27 | def api_token(self, value):
28 | self._api_token = value
29 |
30 | @property
31 | def api_token_expires_at(self):
32 | return self._api_token_expires_at
33 |
34 | @api_token_expires_at.setter
35 | def api_token_expires_at(self, value):
36 | self._api_token_expires_at = value
37 |
38 | def is_token_valid(self):
39 | return (
40 | self.api_token
41 | and self.api_token_expires_at
42 | and self.api_token_expires_at > datetime.now()
43 | )
44 |
45 | @property
46 | def current_conversation_id(self):
47 | return self._current_conversation_id
48 |
49 | @current_conversation_id.setter
50 | def current_conversation_id(self, value):
51 | self._current_conversation_id = value
52 |
53 | @property
54 | def current_conversation_last_message_timestamp(self):
55 | return self._current_conversation_last_message_timestamp
56 |
57 | @current_conversation_last_message_timestamp.setter
58 | def current_conversation_last_message_timestamp(self, value):
59 | self._current_conversation_last_message_timestamp = value
60 |
--------------------------------------------------------------------------------
/docs/feature_ideas.md:
--------------------------------------------------------------------------------
1 | # Assistant feature ideas
2 | Assistant should be availiable through various interfaces.
3 | The main one would be discord bot - a server with multiple channels for diffrent purposes.
4 | Additionally it should be availiable through voice assistant - using watch/phone commands + shortcuts
5 |
6 | ## Basic functionalities
7 | - conversation bot - chat like experience, ask about anything
8 | - enrich the anwers with google search - from default or executed on demand
9 | - brave https://brave.com/search/api/
10 | - duckduckgo
11 | - serp api
12 | - long term memory - use information from the knowledge base and info about user to enrich prompts to make answers it more accurate
13 | - config file with the basic info about the user to give the context
14 | - feature to save info to long term memory
15 | - feature to retrieve info from long term memory
16 | - useful functions (chat modes)
17 | - prompts that can be used in various situations eg. correct grammar, wording, translate into diffrent languages
18 | - prompts for creating code - https://qdrant.tech/documentation/tutorials/code-search/
19 | - create a day summary based on the calendar events
20 | - remind about diffrent things based on the created events
21 | - feature to create reminder - remind me about sending that email in 30 minutes
22 | - feature to create recurring reminders - remind me to stand up every 30 minutes
23 | - save notes/quotes from books, articles, etc.
24 | - save notes using chat or voice
25 | - save it with tags, so it can be easily categorized and then retrieved
26 | - daily summary (readwise like) - send a selected quote or note from the list and present it in the chat
27 | - feature to retrieve notes using tags
28 |
29 | ### Feature ideas for the future
30 | - basic app instead of discord server - streamlit app?
31 | - mode for creative ideas discussion, brainstorming, problem solving - agent like?
32 | - help creating notes on content consumed (books, articles, videos, podcasts, films, series etc.)
33 | - summarize yt video
34 | - summarize podcast or article
--------------------------------------------------------------------------------
/docs/jupyter_notebooks/web_summary.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from langchain_community.document_loaders import WebBaseLoader\n",
10 | "\n",
11 | "URL = \"https://www.kalzumeus.com/2011/10/28/dont-call-yourself-a-programmer/\"\n",
12 | "\n",
13 | "JINA_PREFIX = \"https://r.jina.ai/\"\n",
14 | "\n",
15 | "loader = WebBaseLoader(URL)\n",
16 | "data = loader.load()"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 20,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "import requests\n",
26 | "import json\n",
27 | "\n",
28 | "response = requests.get(JINA_PREFIX + URL, headers={\"Accept\": \"application/json\"})\n",
29 | "response_json = json.loads(response.text)\n",
30 | "response_json_content = response_json[\"data\"][\"content\"]\n",
31 | "metadata = {\"title\": response_json[\"data\"][\"title\"],\n",
32 | " \"url\": response_json[\"data\"][\"url\"]}"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 22,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "{'title': \"Don't Call Yourself A Programmer, And Other Career Advice\", 'url': 'https://www.kalzumeus.com/2011/10/28/dont-call-yourself-a-programmer/'}\n"
45 | ]
46 | }
47 | ],
48 | "source": [
49 | "# print(response.text)\n",
50 | "print(metadata)\n",
51 | "# print(data)"
52 | ]
53 | }
54 | ],
55 | "metadata": {
56 | "kernelspec": {
57 | "display_name": "jupyter",
58 | "language": "python",
59 | "name": "python3"
60 | },
61 | "language_info": {
62 | "codemirror_mode": {
63 | "name": "ipython",
64 | "version": 3
65 | },
66 | "file_extension": ".py",
67 | "mimetype": "text/x-python",
68 | "name": "python",
69 | "nbconvert_exporter": "python",
70 | "pygments_lexer": "ipython3",
71 | "version": "3.11.7"
72 | }
73 | },
74 | "nbformat": 4,
75 | "nbformat_minor": 2
76 | }
77 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |

2 |
3 | # Personal Assistant
4 | A virtual personal assistant designed to simplify your daily tasks. The assistant can be accessed through the Command Line Interface (CLI) or as a Discord bot. With the assistance of Generative AI, it automates daily activities, making your life more organized and easier.
5 |
6 | Project built for the [100 commitów](https://100commitow.pl/) competition. Its goal is to develop an open source project for 100 days.
7 |
8 | ## Technologies
9 | [](https://skillicons.dev)
10 |
11 | ## Architecture diagram
12 | 
13 |
14 | ## How to run?
15 | You only need to have docker and docker-compose installed on your machine. Then you can run the following commands:
16 | 1. Clone the repository```git clone https://github.com/janbanot/personal_assistant.git```
17 | 2. Create an image with python virtual environment and install all the dependencies```bash env.sh```
18 | 3. Build the project ```bash build.sh```
19 | - Build the project using local docker-compose settings ```bash build.sh --local```
20 |
21 | ## Current state of the project
22 | ### Project architecture
23 | - [x] Dockerized environment
24 | - [x] CI/CD pipeline
25 | - [x] test framework
26 | - [x] the first basic tests are implemented.
27 |
28 | ### API
29 | - [x] token-based authentication and login process
30 | - [x] endpoint to communicate with the assistant
31 | - [x] endpoint to get YT video summary
32 | - [x] endpoint to get page summary
33 |
34 | ### Discord bot
35 | - [x] bot configuration
36 | - [x] command to communicate with the assistant
37 | - [x] command to get YT video summary
38 | - [x] command to get page summary
39 | - [x] command to fix English text
40 |
41 | ### Next steps:
42 | - Improve chatbot functionalities
43 | - Add more commands with prompts covering frequent use cases
44 | - Add memory to the chatbot (information about the user, context of the conversation, possibility to save data)
45 |
46 | ## Read more about the project
47 | - [feature ideas](docs/feature_ideas.md)
48 | - [todo list](docs/todo.md)
49 |
--------------------------------------------------------------------------------
/api/routes/yt_summary_view.py:
--------------------------------------------------------------------------------
1 | from flask import request, jsonify, current_app
2 | from flask.views import MethodView
3 | from flask_jwt_extended import jwt_required
4 | from dotenv import load_dotenv
5 | from langchain_core.prompts import PromptTemplate
6 | from langchain_community.document_loaders import YoutubeLoader
7 | from langchain_openai import ChatOpenAI
8 | from langchain.chains.summarize import load_summarize_chain
9 | from langchain.text_splitter import RecursiveCharacterTextSplitter
10 |
11 | load_dotenv()
12 |
13 |
14 | class YTSummaryView(MethodView):
15 | decorators = [jwt_required()]
16 |
17 | def post(self):
18 | data = request.get_json()
19 | url = data.get("url", "")
20 |
21 | current_app.logger.info("Request: %s", request)
22 | current_app.logger.info("URL: %s", url)
23 |
24 | try:
25 | loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
26 | except Exception as e:
27 | print(f"Invalid YouTube URL: {url}. Error: {e}")
28 | results = loader.load()
29 |
30 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
31 |
32 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=500)
33 |
34 | for document in results:
35 | text_content = document.page_content
36 |
37 | docs = text_splitter.create_documents([text_content])
38 |
39 | map_prompt = """
40 | Write a concise summary of the following:
41 | "{text}"
42 | CONCISE SUMMARY:
43 | """
44 |
45 | map_prompt_template = PromptTemplate(
46 | template=map_prompt, input_variables=["text"]
47 | )
48 |
49 | summary_combine_prompt = """"
50 | Write detailed and comprehensive summary of the video transcript text.
51 | The summary should cover the main points and key details of the text.
52 | Return your response in bullet points.
53 | ```{text}```
54 | BULLET POINT SUMMARY:
55 | """
56 |
57 | summary_combine_prompt_template = PromptTemplate(
58 | template=summary_combine_prompt, input_variables=["text"]
59 | )
60 |
61 | summary_chain = load_summarize_chain(
62 | llm=llm,
63 | chain_type="map_reduce",
64 | map_prompt=map_prompt_template,
65 | combine_prompt=summary_combine_prompt_template,
66 | # verbose=True
67 | )
68 |
69 | summary_output = summary_chain.run(docs)
70 |
71 | # TODO: add option to ask question about the video, to extend a point from summary, etc.
72 | return jsonify({"summary": summary_output})
73 |
--------------------------------------------------------------------------------
/api/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import logging
4 | from flask import Flask
5 | from api.extensions import db, ma, jwt
6 | from api.routes.login_view import LoginView
7 | from api.routes.chat_view import ChatView
8 | from api.routes.clear_context_view import ClearView
9 | from api.routes.test_view import TestView
10 | from api.routes.yt_summary_view import YTSummaryView
11 | from api.routes.check_english_view import CheckEnglishView
12 | from api.routes.web_page_summary_view import WebPageSummaryView
13 | from api.routes.db_conversation_id_view import DBConversationIdView
14 |
15 |
16 | def create_app():
17 | app = Flask(__name__)
18 | SECRET_KEY = os.getenv("FLASK_SECRET_KEY", "this is a secret key")
19 | app.secret_key = SECRET_KEY
20 | app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv("POSTGRES_URL", "")
21 |
22 | # Initialize the instances with the app
23 | db.init_app(app)
24 | ma.init_app(app)
25 | jwt.init_app(app)
26 |
27 | app.add_url_rule(
28 | "/",
29 | view_func=TestView.as_view("test_view"),
30 | methods=[
31 | "GET",
32 | ],
33 | )
34 | app.add_url_rule(
35 | "/login",
36 | view_func=LoginView.as_view("login_view"),
37 | methods=[
38 | "POST",
39 | ],
40 | )
41 | app.add_url_rule(
42 | "/chat",
43 | view_func=ChatView.as_view("chat_view"),
44 | methods=[
45 | "POST",
46 | ],
47 | )
48 | app.add_url_rule(
49 | "/clear-context",
50 | view_func=ClearView.as_view("clear_view"),
51 | methods=[
52 | "POST",
53 | ],
54 | )
55 | app.add_url_rule(
56 | "/yt-summary",
57 | view_func=YTSummaryView.as_view("yt_summary_view"),
58 | methods=[
59 | "POST",
60 | ],
61 | )
62 | app.add_url_rule(
63 | "/check-english",
64 | view_func=CheckEnglishView.as_view("check_english_view"),
65 | methods=[
66 | "POST",
67 | ],
68 | )
69 | app.add_url_rule(
70 | "/page-summary",
71 | view_func=WebPageSummaryView.as_view("page_summary_view"),
72 | methods=[
73 | "POST",
74 | ],
75 | )
76 | app.add_url_rule(
77 | "/db/conversation-id",
78 | view_func=DBConversationIdView.as_view("db_conversation_id_view"),
79 | methods=[
80 | "GET",
81 | ],
82 | )
83 |
84 | # Configure logging
85 | logging.basicConfig(
86 | level=logging.DEBUG, handlers=[logging.StreamHandler(sys.stdout)]
87 | )
88 | app.logger.info("Starting application")
89 |
90 | return app
91 |
--------------------------------------------------------------------------------
/alembic/env.py:
--------------------------------------------------------------------------------
1 | from logging.config import fileConfig
2 |
3 | from sqlalchemy import engine_from_config
4 | from sqlalchemy import pool
5 |
6 | from alembic import context
7 |
8 | from dotenv import load_dotenv
9 |
10 | import os
11 |
12 | load_dotenv()
13 |
14 | # this is the Alembic Config object, which provides
15 | # access to the values within the .ini file in use.
16 | config = context.config
17 |
18 | # Replace the sqlalchemy.url value with the one from .env file
19 | database_url = os.getenv('POSTGRES_URL')
20 |
21 | if not database_url:
22 | raise ValueError('POSTGRES_URL environment variable not set')
23 |
24 | config.set_main_option('sqlalchemy.url', database_url)
25 |
26 | # Interpret the config file for Python logging.
27 | # This line sets up loggers basically.
28 | if config.config_file_name is not None:
29 | fileConfig(config.config_file_name)
30 |
31 | # add your model's MetaData object here
32 | # for 'autogenerate' support
33 | # from myapp import mymodel
34 | # target_metadata = mymodel.Base.metadata
35 | target_metadata = None # type: ignore
36 |
37 | # other values from the config, defined by the needs of env.py,
38 | # can be acquired:
39 | # my_important_option = config.get_main_option("my_important_option")
40 | # ... etc.
41 |
42 |
43 | def run_migrations_offline() -> None:
44 | """Run migrations in 'offline' mode.
45 |
46 | This configures the context with just a URL
47 | and not an Engine, though an Engine is acceptable
48 | here as well. By skipping the Engine creation
49 | we don't even need a DBAPI to be available.
50 |
51 | Calls to context.execute() here emit the given string to the
52 | script output.
53 |
54 | """
55 | url = config.get_main_option("sqlalchemy.url")
56 | context.configure(
57 | url=url,
58 | target_metadata=target_metadata,
59 | literal_binds=True,
60 | dialect_opts={"paramstyle": "named"},
61 | )
62 |
63 | with context.begin_transaction():
64 | context.run_migrations()
65 |
66 |
67 | def run_migrations_online() -> None:
68 | """Run migrations in 'online' mode.
69 |
70 | In this scenario we need to create an Engine
71 | and associate a connection with the context.
72 |
73 | """
74 | connectable = engine_from_config(
75 | config.get_section(config.config_ini_section, {}),
76 | prefix="sqlalchemy.",
77 | poolclass=pool.NullPool,
78 | )
79 |
80 | with connectable.connect() as connection:
81 | context.configure(
82 | connection=connection, target_metadata=target_metadata
83 | )
84 |
85 | with context.begin_transaction():
86 | context.run_migrations()
87 |
88 |
89 | if context.is_offline_mode():
90 | run_migrations_offline()
91 | else:
92 | run_migrations_online()
93 |
--------------------------------------------------------------------------------
/api/routes/web_page_summary_view.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | from flask import request, jsonify, current_app
4 | from flask.views import MethodView
5 | from flask_jwt_extended import jwt_required
6 | from dotenv import load_dotenv
7 | from langchain_openai import ChatOpenAI
8 | from langchain_core.prompts import PromptTemplate
9 | from langchain.text_splitter import RecursiveCharacterTextSplitter
10 | from langchain.chains.summarize import load_summarize_chain
11 |
12 | load_dotenv()
13 |
14 |
15 | class WebPageSummaryView(MethodView):
16 | decorators = [jwt_required()]
17 |
18 | def post(self):
19 | JINA_PREFIX = "https://r.jina.ai/"
20 |
21 | data = request.get_json()
22 | url = data.get("url", "")
23 |
24 | current_app.logger.info("Request: %s", request)
25 | current_app.logger.info("URL: %s", url)
26 |
27 | try:
28 | response = requests.get(JINA_PREFIX + url, headers={"Accept": "application/json"})
29 | except Exception as e:
30 | print(f"Invalid URL: {url}. Error: {e}")
31 |
32 | response_json = json.loads(response.text)
33 | text_content = response_json["data"]["content"]
34 | # metadata = {"title": response_json["data"]["title"], "url": response_json["data"]["url"]}
35 |
36 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
37 |
38 | text_splitter = RecursiveCharacterTextSplitter(
39 | separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500
40 | )
41 |
42 | docs = text_splitter.create_documents([text_content])
43 |
44 | # TODO: rewrite to share code with yt_summary_view
45 | map_prompt = """
46 | Write a concise summary of the following:
47 | "{text}"
48 | CONCISE SUMMARY:
49 | """
50 |
51 | map_prompt_template = PromptTemplate(
52 | template=map_prompt, input_variables=["text"]
53 | )
54 |
55 | summary_combine_prompt = """"
56 | Write detailed and comprehensive summary of the article.
57 | The summary should cover the main points and key details of the text.
58 | Return your response in bullet points.
59 | ```{text}```
60 | BULLET POINT SUMMARY:
61 | """
62 |
63 | summary_combine_prompt_template = PromptTemplate(
64 | template=summary_combine_prompt, input_variables=["text"]
65 | )
66 |
67 | summary_chain = load_summarize_chain(
68 | llm=llm,
69 | chain_type="map_reduce",
70 | map_prompt=map_prompt_template,
71 | combine_prompt=summary_combine_prompt_template,
72 | # verbose=True
73 | )
74 |
75 | summary_output = summary_chain.run(docs)
76 |
77 | # TODO: add option to ask question about the text, to extend a point from summary, etc.
78 | return jsonify({"summary": summary_output})
79 |
--------------------------------------------------------------------------------
/api/routes/chat_view.py:
--------------------------------------------------------------------------------
1 | from flask import request, jsonify
2 | from flask.views import MethodView
3 | from flask_jwt_extended import jwt_required
4 | from dotenv import load_dotenv
5 | from datetime import datetime
6 | from dateutil import tz # type: ignore
7 | from langchain_openai import ChatOpenAI
8 | from langchain.prompts.prompt import PromptTemplate
9 | from langchain.chains import ConversationChain
10 | from langchain.memory import ConversationSummaryMemory
11 | from api.database.db_manager import DBManager
12 |
13 | # TODO: try to implement it without using the langchain directly
14 | load_dotenv()
15 |
16 | context_memory = ConversationSummaryMemory(llm=ChatOpenAI(), ai_prefix="Assistant")
17 | # TODO: refactor to use the same instance of the model
18 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
19 |
20 | current_datetime = datetime.now()
21 | gmt_plus_2_tz = tz.gettz('Etc/GMT-2')
22 | current_datetime = current_datetime.astimezone(gmt_plus_2_tz)
23 | current_datetime_str = current_datetime.strftime('%Y/%m/%d, %H:%M:%S')
24 |
25 | datetime_string = f"Current datetime: {current_datetime_str} \n"
26 |
27 | template = datetime_string + """
28 | You are an AI assistant designed for ultra-concise, engaging conversations.
29 | Follow these rules:
30 | - Use the fewest words possible while maintaining clarity, impact and natural language
31 | - Keep a friendly, casual tone with occasional colloquialisms
32 | - Always wrap code with triple backticks and keywords with `single backticks`
33 | - Ask for clarification to avoid assumptions
34 | - Detect intentions and emotional states to tailor responses perfectly.
35 | - Focus solely on instructions and provide relevant, comprehensive responses
36 | - Never repeat info or mention limitations
37 | - Simplify complex tasks; provide the best output possible
38 | - Prioritize user needs; tailor responses to their context and goals
39 | - When asked for specific content, start response with requested info immediately
40 | - Continuously improve based on user feedback
41 |
42 | Current conversation:
43 | {history}
44 | Human: {input}
45 | AI Assistant:
46 | """
47 |
48 | # TODO: Check why langchain default template example is sent in the context with every message?
49 | PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)
50 |
51 | conversation = ConversationChain(
52 | prompt=PROMPT,
53 | llm=llm,
54 | verbose=True,
55 | memory=context_memory,
56 | )
57 |
58 |
59 | class ChatView(MethodView):
60 | decorators = [jwt_required()]
61 |
62 | def __init__(self):
63 | self.db_manager = DBManager()
64 |
65 | def post(self):
66 | data = request.get_json()
67 | input_text = data.get("message", "")
68 | conversation_id = data.get("conversation_id", "")
69 |
70 | current_context = context_memory.buffer
71 |
72 | result = conversation.predict(input=input_text)
73 |
74 | self.db_manager.save_message(conversation_id, input_text, current_context, result)
75 |
76 | return jsonify({"message": result})
77 |
--------------------------------------------------------------------------------
/docs/notes.md:
--------------------------------------------------------------------------------
1 | # Useful developmnet notes
2 | ### TODO
3 | change to use https://github.com/casey/just
4 | change to use password as variable that is set in the first command and then used in the next ones
5 |
6 | ## DB
7 | - connect to db
8 | ```bash
9 | psql postgresql://USER:PASSWORD@assistant_db:5432/assistant_db
10 | ```
11 |
12 | ## CURLs
13 | - login
14 | ```bash
15 | curl -X POST -H "Content-Type: application/json" -d '{"email": "test@test.com", "password": "test1"}' http://localhost:8081/login
16 | ```
17 |
18 | - endpoint with token
19 | ```bash
20 | curl -X GET -H "Authorization: Bearer ABC" http://localhost:8081/
21 | ```
22 |
23 | - chat
24 | ```bash
25 | curl -X POST -H "Authorization: Bearer ABC" -H "Content-Type: application/json" -d '{"message": "Hello", "conversation_id": -1}' http://localhost:8081/chat
26 | ```
27 |
28 | - save token as variable and use it later
29 | ```bash
30 | TOKEN=$(curl -s -X POST -H "Content-Type: application/json" -d '{"email": "test@test.com", "password": "test1"}' http://localhost:8081/login | jq -r '.access_token')
31 |
32 | curl -X POST -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d '{"message": "Hello", "conversation_id": -1}' http://localhost:8081/chat
33 | ```
34 |
35 | - yt_summary endpoint
36 | ```bash
37 | curl -X POST -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d '{"url": "https://www.youtube.com/watch?v=YEJUUB1LNFM"}' http://localhost:8081/yt-summary
38 | ```
39 |
40 | - check_english endpoint
41 | ```bash
42 | curl -X POST -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d '{"text": "My name is Susan. Im fourteen and I live in Germany. My hobbies are going to discos, sometimes I listen to music on the radio. In the summer, I go swimming in a lake. I dont have any brothers or sisters. We take buses to school. Im in year 9 at my school. My birthday is on Friday. I hope I will get a new guitar."}' http://localhost:8081/check-english
43 | ```
44 |
45 | ## Debugging
46 | - debug in docker contianer
47 | - add debugpy fragment to the code
48 | ``` python
49 | import debugpy
50 | debugpy.listen(("0.0.0.0", 5678))
51 | debugpy.wait_for_client()
52 | ```
53 | - in docker-compose expose port 5678
54 | ```yaml
55 | services:
56 | your-service:
57 | ports:
58 | - "5678:5678"
59 | ```
60 | - modify lanuch.json config
61 | ```json
62 | {
63 | "version": "0.2.0",
64 | "configurations": [
65 | {
66 | "name": "Python: Remote Attach",
67 | "type": "python",
68 | "request": "attach",
69 | "connect": {
70 | "host": "localhost",
71 | "port": 5678
72 | },
73 | "pathMappings": [
74 | {
75 | "localRoot": "${workspaceFolder}",
76 | "remoteRoot": "/app"
77 | }
78 | ]
79 | }
80 | ]
81 | }
82 | ```
83 | - add breakpoint in the code
84 | ```python
85 | debugpy.breakpoint()
86 | ```
87 | - run docker-compose
88 | - attach debugger in VSCode
--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
1 | # A generic, single database configuration.
2 |
3 | [alembic]
4 | # path to migration scripts
5 | script_location = alembic
6 |
7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
8 | # Uncomment the line below if you want the files to be prepended with date and time
9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
10 | # for all available tokens
11 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
12 |
13 | # sys.path path, will be prepended to sys.path if present.
14 | # defaults to the current working directory.
15 | prepend_sys_path = .
16 |
17 | # timezone to use when rendering the date within the migration file
18 | # as well as the filename.
19 | # If specified, requires the python>=3.9 or backports.zoneinfo library.
20 | # Any required deps can installed by adding `alembic[tz]` to the pip requirements
21 | # string value is passed to ZoneInfo()
22 | # leave blank for localtime
23 | # timezone =
24 |
25 | # max length of characters to apply to the
26 | # "slug" field
27 | # truncate_slug_length = 40
28 |
29 | # set to 'true' to run the environment during
30 | # the 'revision' command, regardless of autogenerate
31 | # revision_environment = false
32 |
33 | # set to 'true' to allow .pyc and .pyo files without
34 | # a source .py file to be detected as revisions in the
35 | # versions/ directory
36 | # sourceless = false
37 |
38 | # version location specification; This defaults
39 | # to alembic/versions. When using multiple version
40 | # directories, initial revisions must be specified with --version-path.
41 | # The path separator used here should be the separator specified by "version_path_separator" below.
42 | # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
43 |
44 | # version path separator; As mentioned above, this is the character used to split
45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47 | # Valid values for version_path_separator are:
48 | #
49 | # version_path_separator = :
50 | # version_path_separator = ;
51 | # version_path_separator = space
52 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
53 |
54 | # set to 'true' to search source files recursively
55 | # in each "version_locations" directory
56 | # new in Alembic version 1.10
57 | # recursive_version_locations = false
58 |
59 | # the output encoding used when revision files
60 | # are written from script.py.mako
61 | # output_encoding = utf-8
62 |
63 | sqlalchemy.url = ${POSTGRES_URL}
64 |
65 |
66 | [post_write_hooks]
67 | # post_write_hooks defines scripts or Python functions that are run
68 | # on newly generated revision scripts. See the documentation for further
69 | # detail and examples
70 |
71 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
72 | # hooks = black
73 | # black.type = console_scripts
74 | # black.entrypoint = black
75 | # black.options = -l 79 REVISION_SCRIPT_FILENAME
76 |
77 | # lint with attempts to fix using "ruff" - use the exec runner, execute a binary
78 | # hooks = ruff
79 | # ruff.type = exec
80 | # ruff.executable = %(here)s/.venv/bin/ruff
81 | # ruff.options = --fix REVISION_SCRIPT_FILENAME
82 |
83 | # Logging configuration
84 | [loggers]
85 | keys = root,sqlalchemy,alembic
86 |
87 | [handlers]
88 | keys = console
89 |
90 | [formatters]
91 | keys = generic
92 |
93 | [logger_root]
94 | level = WARN
95 | handlers = console
96 | qualname =
97 |
98 | [logger_sqlalchemy]
99 | level = WARN
100 | handlers =
101 | qualname = sqlalchemy.engine
102 |
103 | [logger_alembic]
104 | level = INFO
105 | handlers =
106 | qualname = alembic
107 |
108 | [handler_console]
109 | class = StreamHandler
110 | args = (sys.stderr,)
111 | level = NOTSET
112 | formatter = generic
113 |
114 | [formatter_generic]
115 | format = %(levelname)-5.5s [%(name)s] %(message)s
116 | datefmt = %H:%M:%S
117 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp==3.9.3
2 | aiosignal==1.3.1
3 | alembic==1.13.1
4 | annotated-types==0.6.0
5 | anyio==4.3.0
6 | appnope==0.1.4
7 | asgiref==3.8.1
8 | asttokens==2.4.1
9 | asyncio==3.4.3
10 | attrs==23.2.0
11 | backoff==2.2.1
12 | bcrypt==4.1.2
13 | beautifulsoup4==4.12.3
14 | blinker==1.7.0
15 | build==1.2.1
16 | cachetools==5.3.3
17 | certifi==2024.2.2
18 | charset-normalizer==3.3.2
19 | chroma-hnswlib==0.7.3
20 | chromadb==0.4.24
21 | click==8.1.7
22 | coloredlogs==15.0.1
23 | comm==0.2.2
24 | dataclasses-json==0.6.4
25 | debugpy==1.8.1
26 | decorator==5.1.1
27 | Deprecated==1.2.14
28 | discord.py==2.3.2
29 | distro==1.9.0
30 | executing==2.0.1
31 | fastapi==0.110.1
32 | filelock==3.13.4
33 | flake8==7.0.0
34 | Flask==3.0.2
35 | Flask-JWT-Extended==4.6.0
36 | flask-marshmallow==1.2.0
37 | Flask-SQLAlchemy==3.1.1
38 | flatbuffers==24.3.25
39 | frozenlist==1.4.1
40 | fsspec==2024.3.1
41 | google-auth==2.29.0
42 | googleapis-common-protos==1.63.0
43 | grpcio==1.62.0
44 | grpcio-tools==1.62.0
45 | gunicorn==21.2.0
46 | h11==0.14.0
47 | h2==4.1.0
48 | hpack==4.0.0
49 | httpcore==1.0.4
50 | httptools==0.6.1
51 | httpx==0.27.0
52 | huggingface-hub==0.22.2
53 | humanfriendly==10.0
54 | hyperframe==6.0.1
55 | idna==3.6
56 | importlib-metadata==7.0.0
57 | importlib_resources==6.4.0
58 | iniconfig==2.0.0
59 | ipython==8.23.0
60 | itsdangerous==2.1.2
61 | jedi==0.19.1
62 | Jinja2==3.1.3
63 | jsonpatch==1.33
64 | jsonpointer==2.4
65 | jupyter_client==8.6.1
66 | jupyter_core==5.7.2
67 | kubernetes==29.0.0
68 | langchain==0.1.16
69 | langchain-community==0.0.32
70 | langchain-core==0.1.42
71 | langchain-openai==0.1.3
72 | langchain-text-splitters==0.0.1
73 | langchainhub==0.1.15
74 | langsmith==0.1.47
75 | Mako==1.3.2
76 | markdown-it-py==3.0.0
77 | MarkupSafe==2.1.5
78 | marshmallow==3.21.0
79 | marshmallow-sqlalchemy==1.0.0
80 | matplotlib-inline==0.1.6
81 | mccabe==0.7.0
82 | mdurl==0.1.2
83 | mmh3==4.1.0
84 | monotonic==1.6
85 | mpmath==1.3.0
86 | multidict==6.0.5
87 | mypy==1.8.0
88 | mypy-extensions==1.0.0
89 | nest-asyncio==1.6.0
90 | numpy==1.26.4
91 | oauthlib==3.2.2
92 | onnxruntime==1.17.3
93 | openai==1.17.1
94 | opentelemetry-api==1.24.0
95 | opentelemetry-exporter-otlp-proto-common==1.24.0
96 | opentelemetry-exporter-otlp-proto-grpc==1.24.0
97 | opentelemetry-instrumentation==0.45b0
98 | opentelemetry-instrumentation-asgi==0.45b0
99 | opentelemetry-instrumentation-fastapi==0.45b0
100 | opentelemetry-proto==1.24.0
101 | opentelemetry-sdk==1.24.0
102 | opentelemetry-semantic-conventions==0.45b0
103 | opentelemetry-util-http==0.45b0
104 | orjson==3.10.0
105 | overrides==7.7.0
106 | packaging==23.2
107 | parso==0.8.4
108 | pexpect==4.9.0
109 | platformdirs==4.2.0
110 | pluggy==1.4.0
111 | portalocker==2.8.2
112 | posthog==3.5.0
113 | prompt-toolkit==3.0.43
114 | protobuf==4.25.3
115 | psutil==5.9.8
116 | psycopg2==2.9.9
117 | ptyprocess==0.7.0
118 | pulsar-client==3.4.0
119 | pure-eval==0.2.2
120 | pyasn1==0.6.0
121 | pyasn1_modules==0.4.0
122 | pycodestyle==2.11.1
123 | pydantic==2.6.3
124 | pydantic_core==2.16.3
125 | pyflakes==3.2.0
126 | Pygments==2.17.2
127 | PyJWT==2.8.0
128 | PyPika==0.48.9
129 | pyproject_hooks==1.0.0
130 | pytest==8.0.2
131 | python-dateutil==2.9.0.post0
132 | python-dotenv==1.0.1
133 | pytube==15.0.0
134 | PyYAML==6.0.1
135 | pyzmq==25.1.2
136 | qdrant-client==1.8.0
137 | regex==2023.12.25
138 | requests==2.31.0
139 | requests-oauthlib==2.0.0
140 | rich==13.7.1
141 | rsa==4.9
142 | shellingham==1.5.4
143 | six==1.16.0
144 | sniffio==1.3.1
145 | soupsieve==2.5
146 | SQLAlchemy==2.0.27
147 | stack-data==0.6.3
148 | starlette==0.37.2
149 | sympy==1.12
150 | tenacity==8.2.3
151 | tiktoken==0.5.2
152 | tokenizers==0.15.2
153 | tornado==6.4
154 | tqdm==4.66.2
155 | traitlets==5.14.2
156 | typer==0.12.3
157 | types-requests==2.31.0.20240218
158 | typing-inspect==0.9.0
159 | typing_extensions==4.10.0
160 | urllib3==2.2.1
161 | uvicorn==0.29.0
162 | uvloop==0.19.0
163 | watchfiles==0.21.0
164 | wcwidth==0.2.13
165 | websocket-client==1.7.0
166 | websockets==12.0
167 | Werkzeug==3.0.1
168 | wrapt==1.16.0
169 | yarl==1.9.4
170 | youtube-transcript-api==0.6.2
171 | zipp==3.18.1
172 |
--------------------------------------------------------------------------------
/.github/workflows/basic.yml:
--------------------------------------------------------------------------------
1 | name: Basic Workflow
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 |
7 | jobs:
8 | setup:
9 | runs-on: self-hosted
10 | environment: production
11 |
12 | steps:
13 | - uses: actions/checkout@v4
14 |
15 | - name: Check for changes in requirements.txt
16 | id: check
17 | run: |
18 | if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep '^requirements.txt$'; then
19 | echo "deps_changed=true" >> $GITHUB_ENV
20 | else
21 | echo "deps_changed=false" >> $GITHUB_ENV
22 | fi
23 |
24 | - name: Run Env
25 | run: bash env.sh
26 | if: env.deps_changed == 'true'
27 |
28 | build:
29 | needs: setup
30 | runs-on: self-hosted
31 | environment: production
32 |
33 | steps:
34 | - uses: actions/checkout@v4
35 |
36 | - name: Create .env files
37 | run: |
38 | echo "FLASK_SECRET_KEY=${{ secrets.FLASK_SECRET_KEY }}" > .env
39 | echo "FLASK_DEBUG_MODE=${{ secrets.FLASK_DEBUG_MODE }}" >> .env
40 | echo "POSTGRES_URL=${{ secrets.POSTGRES_URL }}" >> .env
41 | echo "POSTGRES_USER=${{ secrets.POSTGRES_USER }}" >> .env
42 | echo "POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}" >> .env
43 | echo "POSTGRES_DB=${{ secrets.POSTGRES_DB }}" >> .env
44 | echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> .env
45 | echo "LANGCHAIN_API_KEY=${{ secrets.LANGCHAIN_API_KEY }}" >> .env
46 | echo "LANGCHAIN_TRACING_V2=${{ secrets.LANGCHAIN_TRACING_V2 }}" >> .env
47 | echo "LANGCHAIN_PROJECT=${{ secrets.LANGCHAIN_PROJECT }}" >> .env
48 | echo "LANGCHAIN_ENDPOINT=${{ secrets.LANGCHAIN_ENDPOINT }}" >> .env
49 | mkdir -p ./discord_bot/
50 | echo "DISCORD_TOKEN=${{ secrets.DISCORD_TOKEN }}" > ./discord_bot/.env
51 | echo "DISCORD_GUILD_ID=${{ secrets.DISCORD_GUILD_ID }}" >> ./discord_bot/.env
52 | echo "API_USER_EMAIL=${{ secrets.API_USER_EMAIL }}" >> ./discord_bot/.env
53 | echo "API_PASSWORD=${{ secrets.API_PASSWORD }}" >> ./discord_bot/.env
54 | echo "API_URL=${{ secrets.API_URL }}" >> ./discord_bot/.env
55 |
56 | - name: Run Build
57 | run: |
58 | export GITHUB_RUN_ID=${{ github.run_id }}
59 | bash build.sh
60 |
61 | test:
62 | needs: build
63 | runs-on: self-hosted
64 | environment: production
65 |
66 | steps:
67 | - uses: actions/checkout@v4
68 |
69 | - name: Create .env files
70 | run: |
71 | echo "FLASK_SECRET_KEY=${{ secrets.FLASK_SECRET_KEY }}" > .env
72 | echo "FLASK_DEBUG_MODE=${{ secrets.FLASK_DEBUG_MODE }}" >> .env
73 | echo "POSTGRES_URL=${{ secrets.POSTGRES_URL }}" >> .env
74 | echo "POSTGRES_USER=${{ secrets.POSTGRES_USER }}" >> .env
75 | echo "POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}" >> .env
76 | echo "POSTGRES_DB=${{ secrets.POSTGRES_DB }}" >> .env
77 | echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> .env
78 | echo "LANGCHAIN_API_KEY=${{ secrets.LANGCHAIN_API_KEY }}" >> .env
79 | echo "LANGCHAIN_TRACING_V2=${{ secrets.LANGCHAIN_TRACING_V2 }}" >> .env
80 | echo "LANGCHAIN_PROJECT=${{ secrets.LANGCHAIN_PROJECT }}" >> .env
81 | echo "LANGCHAIN_ENDPOINT=${{ secrets.LANGCHAIN_ENDPOINT }}" >> .env
82 | mkdir -p ./discord_bot/
83 | echo "DISCORD_TOKEN=${{ secrets.DISCORD_TOKEN }}" > ./discord_bot/.env
84 | echo "DISCORD_GUILD_ID=${{ secrets.DISCORD_GUILD_ID }}" >> ./discord_bot/.env
85 | echo "API_USER_EMAIL=${{ secrets.API_USER_EMAIL }}" >> ./discord_bot/.env
86 | echo "API_PASSWORD=${{ secrets.API_PASSWORD }}" >> ./discord_bot/.env
87 | echo "API_URL=${{ secrets.API_URL }}" >> ./discord_bot/.env
88 |
89 | - name: Run Tests
90 | id: tests
91 | run: |
92 | docker-compose up --build --exit-code-from assistant_test assistant_test
93 |
94 | - name: Cleanup Docker images from that run
95 | if: ${{ failure() }}
96 | run: |
97 | export GITHUB_RUN_ID=${{ github.run_id }}
98 | docker stop $(docker ps -a -q -f "label=workflow=$GITHUB_RUN_ID")
99 | docker rm $(docker ps -a -q -f "label=workflow=$GITHUB_RUN_ID")
100 |
101 | - name: Cleanup Test Container
102 | if: always()
103 | run: |
104 | if [ "$(docker ps -a -q -f name=assistant_test)" ]; then
105 | docker-compose stop assistant_test
106 | docker-compose rm -f assistant_test
107 | fi
108 |
109 | - name: Cleanup Unused Images
110 | if: always()
111 | run: docker image prune -f
112 |
--------------------------------------------------------------------------------
/discord_bot/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import requests
3 | import json
4 | from datetime import datetime, timedelta
5 | from dotenv import load_dotenv
6 |
7 | # TODO: refactor the file, because all theses will be simmilar, maybe there is a pattern for this
8 | # TODO: change to use aiohttp instead of requests, so discord command can utilize async
9 |
10 | load_dotenv()
11 |
12 | # TODO: move to config object?
13 | EMAIL = os.getenv("API_USER_EMAIL")
14 | PASSWORD = os.getenv("API_PASSWORD")
15 | URL = os.getenv("API_URL")
16 |
17 |
18 | def get_valid_token(config):
19 | if not config.is_token_valid():
20 | login(config)
21 | return config.is_token_valid()
22 |
23 |
24 | def update_conversation_timestamp(config):
25 | config.current_conversation_last_message_timestamp = datetime.now()
26 |
27 |
28 | def is_new_conversation(config):
29 | if config.current_conversation_last_message_timestamp is None:
30 | return True
31 | else:
32 | time_since_last_message = datetime.now() - config.current_conversation_last_message_timestamp
33 | return time_since_last_message > timedelta(minutes=10)
34 |
35 |
36 | def conversation_context_handler(config, force_clear=False):
37 | if force_clear:
38 | config.current_conversation_id += 1
39 | update_conversation_timestamp(config)
40 | return clear_context(config)
41 | elif not config.current_conversation_id:
42 | config.current_conversation_id = 1
43 | update_conversation_timestamp(config)
44 | elif is_new_conversation(config):
45 | config.current_conversation_id += 1
46 | update_conversation_timestamp(config)
47 | clear_context(config)
48 | else:
49 | update_conversation_timestamp(config)
50 |
51 | return None
52 |
53 |
54 | # TODO: change request to aiohttp
55 | def login(config):
56 | url = URL + "login"
57 | headers = {"Content-Type": "application/json"}
58 | data = {"email": EMAIL, "password": PASSWORD}
59 | response = requests.post(url, headers=headers, json=data)
60 | if response.status_code == 200:
61 | response_data = response.json()
62 | config.api_token = response_data.get("access_token")
63 | expires_in = response_data.get("expires_in")
64 | config.api_token_expires_at = datetime.now() + timedelta(seconds=expires_in)
65 | else:
66 | config.api_token = None
67 | config.api_token_expires_at = None
68 |
69 |
70 | def hello_world(config):
71 | auth_header = {"Authorization": f"Bearer {config.api_token}"}
72 | response = requests.get(URL, headers=auth_header)
73 | return response.json()
74 |
75 |
76 | def chat(config, message):
77 | headers = {
78 | "Authorization": f"Bearer {config.api_token}",
79 | "Content-Type": "application/json",
80 | }
81 | url = URL + "chat"
82 | data = {"message": message, "conversation_id": config.current_conversation_id}
83 | response = requests.post(url, headers=headers, data=json.dumps(data))
84 | return response.json()["message"]
85 |
86 |
87 | # TODO: refactor to get rid of duplicated code
88 | def clear_context(config):
89 | headers = {"Authorization": f"Bearer {config.api_token}"}
90 | url = URL + "clear-context"
91 | response = requests.post(url, headers=headers)
92 | return response.json()["message"]
93 |
94 |
95 | def yt_summary(config, video_url):
96 | headers = {
97 | "Authorization": f"Bearer {config.api_token}",
98 | "Content-Type": "application/json",
99 | }
100 | url = URL + "yt-summary"
101 | data = {"url": video_url}
102 | response = requests.post(url, headers=headers, data=json.dumps(data))
103 | return response.json()["summary"]
104 |
105 |
106 | def page_summary(config, page_url):
107 | headers = {
108 | "Authorization": f"Bearer {config.api_token}",
109 | "Content-Type": "application/json",
110 | }
111 | url = URL + "page-summary"
112 | data = {"url": page_url}
113 | response = requests.post(url, headers=headers, data=json.dumps(data))
114 | return response.json()["summary"]
115 |
116 |
117 | def check_english(config, text):
118 | headers = {
119 | "Authorization": f"Bearer {config.api_token}",
120 | "Content-Type": "application/json",
121 | }
122 | url = URL + "check-english"
123 | data = {"text": text}
124 | response = requests.post(url, headers=headers, data=json.dumps(data))
125 | return response.json()["text"]
126 |
127 |
128 | def get_conversation_id(config):
129 | headers = {
130 | "Authorization": f"Bearer {config.api_token}"
131 | }
132 | url = URL + "db/conversation-id"
133 | response = requests.get(url, headers=headers)
134 | return response.json()["conversation_id"]
135 |
--------------------------------------------------------------------------------
/discord_bot/assistant_bot.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import logging
3 | import discord
4 | from discord.ext import commands
5 | from dotenv import load_dotenv
6 | import asyncio
7 | from concurrent.futures import ThreadPoolExecutor
8 | from config import Config
9 | from utils import (
10 | login,
11 | get_valid_token,
12 | chat,
13 | yt_summary,
14 | page_summary,
15 | check_english,
16 | conversation_context_handler,
17 | get_conversation_id
18 | )
19 | from bot_commands import BotCommands, get_bot_commands
20 | from datetime import datetime, timedelta
21 |
22 | load_dotenv()
23 |
24 | config = Config()
25 |
26 | MY_GUILD = discord.Object(id=config.discord_guild_id)
27 |
28 | intents = discord.Intents.default()
29 | intents.message_content = True
30 |
31 | bot = commands.Bot(command_prefix="/", intents=intents)
32 |
33 | logging.basicConfig(level=logging.DEBUG, handlers=[logging.StreamHandler(sys.stdout)])
34 |
35 |
36 | @bot.event
37 | async def on_ready():
38 | login(config)
39 | # TODO: does it make any sense? (conversation_id endpoint, updating it in config after initialization, etc.)
40 | conversation_id = get_conversation_id(config)
41 | config.current_conversation_id = conversation_id
42 | print(f"{bot.user} has connected to Discord!")
43 |
44 |
45 | @bot.event
46 | async def on_message(message):
47 | if message.author == bot.user or message.author.bot:
48 | return
49 |
50 | if message.channel.id in config.chatting_channels_ids:
51 | await handle_bot_chatting(message)
52 |
53 | await bot.process_commands(message)
54 |
55 |
56 | async def handle_bot_chatting(message):
57 | if get_valid_token(config):
58 | if message.content.startswith("!clear"):
59 | response = conversation_context_handler(config, force_clear=True)
60 | else:
61 | conversation_context_handler(config)
62 | response = chat(config, message.content)
63 | await message.channel.send(response)
64 | else:
65 | await message.channel.send("Could not get API token")
66 |
67 |
68 | @bot.command(name="sync", description="Sync commands tree commands")
69 | async def sync_command(ctx: commands.Context):
70 | bot.tree.copy_global_to(guild=MY_GUILD)
71 | await bot.tree.sync(guild=MY_GUILD)
72 | await ctx.send("Commands synced!")
73 |
74 |
75 | @bot.command(name="get_messages", description="Get messages from a channel")
76 | async def get_messages(ctx: commands.Context, channel_id: int, days: int):
77 | channel = bot.get_channel(channel_id)
78 | if not channel:
79 | await ctx.send("Channel not found.")
80 | return
81 |
82 | end_date = datetime.utcnow()
83 | start_date = end_date - timedelta(days=days)
84 |
85 | messages = []
86 | async for message in channel.history(limit=None, after=start_date, before=end_date): # type: ignore
87 | messages.append(f"{message.created_at}: {message.author}: {message.content}")
88 |
89 | await ctx.send(f"Retrieved {len(messages)} messages from the last {days} days.")
90 | await ctx.send(f"First message: {messages[0]}")
91 | await ctx.send(f"Last message: {messages[-1]}")
92 |
93 |
94 | # TODO: refactor to utilize async and remove duplicated code
95 | # TODO: !!!! fix problem with unauthorized error on commands
96 | # handle 401 errors + add handling for errors in api so in bot we can display anything
97 | # add option to ask more questions about the video based on the content
98 | @bot.command(
99 | name=BotCommands.YT_SUMMARY.value.name,
100 | description=BotCommands.YT_SUMMARY.value.description,
101 | )
102 | async def yt_summary_command(ctx: commands.Context, url: str):
103 | loop = asyncio.get_event_loop()
104 | with ThreadPoolExecutor() as pool:
105 | summary = await loop.run_in_executor(pool, yt_summary, config, url)
106 | await ctx.send(summary)
107 |
108 |
109 | @bot.command(
110 | name=BotCommands.PAGE_SUMMARY.value.name,
111 | description=BotCommands.PAGE_SUMMARY.value.description,
112 | )
113 | async def page_summary_command(ctx: commands.Context, url: str):
114 | loop = asyncio.get_event_loop()
115 | with ThreadPoolExecutor() as pool:
116 | summary = await loop.run_in_executor(pool, page_summary, config, url)
117 | await ctx.send(summary)
118 |
119 |
120 | @bot.command(
121 | name=BotCommands.CHECK_ENGLISH.value.name,
122 | description=BotCommands.CHECK_ENGLISH.value.description,
123 | )
124 | async def check_english_command(ctx: commands.Context, *, input_text: str):
125 | fixed_text = check_english(config, input_text)
126 | await ctx.send(fixed_text)
127 |
128 |
129 | @bot.tree.command(
130 | name=BotCommands.LIST_COMMANDS.value.name,
131 | description="Get a list of all available commands",
132 | )
133 | async def list_all_commands(interaction: discord.Interaction) -> None:
134 | commands_list = str(get_bot_commands())
135 | await interaction.response.send_message(commands_list)
136 |
137 |
138 | bot.run(config.discord_token)
139 |
--------------------------------------------------------------------------------
/docs/jupyter_notebooks/yt_video_summary.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# YT video summary playground\n",
8 | "Playground for testing YT english video summary generation"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 168,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import os\n",
18 | "from dotenv import load_dotenv\n",
19 | "from langchain_community.document_loaders import YoutubeLoader\n",
20 | "from langchain_openai import ChatOpenAI\n",
21 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
22 | "from langchain.chains.summarize import load_summarize_chain\n",
23 | "\n",
24 | "load_dotenv()\n",
25 | "openai_key = os.getenv(\"OPENAI_API_KEY\")\n",
26 | "\n",
27 | "# url = \"https://youtu.be/ThnVAgHzsLg?si=4s8wBcvXrfDPEiRn\"\n",
28 | "url = \"https://www.youtube.com/watch?v=Hkgz1ysv9Fk\"\n",
29 | "# url = \"https://www.youtube.com/watch?v=f9_BWhCI4Zo\"\n",
30 | "# url = \"https://www.youtube.com/watch?v=8OJC21T2SL4\"\n",
31 | "loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)\n",
32 | "results = loader.load()\n",
33 | "\n",
34 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0, openai_api_key=openai_key)"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 169,
40 | "metadata": {},
41 | "outputs": [
42 | {
43 | "data": {
44 | "text/plain": [
45 | "2308"
46 | ]
47 | },
48 | "execution_count": 169,
49 | "metadata": {},
50 | "output_type": "execute_result"
51 | }
52 | ],
53 | "source": [
54 | "for document in results:\n",
55 | " text_content = document.page_content\n",
56 | "\n",
57 | "llm.get_num_tokens(text_content)"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 170,
63 | "metadata": {},
64 | "outputs": [
65 | {
66 | "name": "stdout",
67 | "output_type": "stream",
68 | "text": [
69 | "Now we have 2 documents and the first one has 2169 tokens\n"
70 | ]
71 | }
72 | ],
73 | "source": [
74 | "from langchain import OpenAI\n",
75 | "from langchain.chains.summarize import load_summarize_chain\n",
76 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
77 | "\n",
78 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=500)\n",
79 | "\n",
80 | "docs = text_splitter.create_documents([text_content])\n",
81 | "\n",
82 | "num_docs = len(docs)\n",
83 | "\n",
84 | "num_tokens_first_doc = llm.get_num_tokens(docs[0].page_content)\n",
85 | "\n",
86 | "print (f\"Now we have {num_docs} documents and the first one has {num_tokens_first_doc} tokens\")"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 171,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "from langchain import PromptTemplate\n",
96 | "\n",
97 | "map_prompt = \"\"\"\n",
98 | "Write a concise summary of the following:\n",
99 | "\"{text}\"\n",
100 | "CONCISE SUMMARY:\n",
101 | "\"\"\"\n",
102 | "map_prompt_template = PromptTemplate(template=map_prompt, input_variables=[\"text\"])\n",
103 | "\n",
104 | "summary_combine_prompt = \"\"\"\"\n",
105 | "Write detailed and comprehensive summary of the video transcript text.\n",
106 | "The summary should cover the main points and key details of the text.\n",
107 | "Return your response in bullet points.\n",
108 | "```{text}```\n",
109 | "BULLET POINT SUMMARY:\n",
110 | "\"\"\"\n",
111 | "summary_combine_prompt_template = PromptTemplate(template=summary_combine_prompt, input_variables=[\"text\"])"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 172,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "summary_chain = load_summarize_chain(llm=llm,\n",
121 | " chain_type='map_reduce',\n",
122 | " map_prompt=map_prompt_template,\n",
123 | " combine_prompt=summary_combine_prompt_template,\n",
124 | "# verbose=True\n",
125 | " )"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 173,
131 | "metadata": {},
132 | "outputs": [],
133 | "source": [
134 | "output = summary_chain.run(docs)\n"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {},
141 | "outputs": [],
142 | "source": [
143 | "print (output)"
144 | ]
145 | }
146 | ],
147 | "metadata": {
148 | "kernelspec": {
149 | "display_name": "personal_assistant",
150 | "language": "python",
151 | "name": "python3"
152 | },
153 | "language_info": {
154 | "codemirror_mode": {
155 | "name": "ipython",
156 | "version": 3
157 | },
158 | "file_extension": ".py",
159 | "mimetype": "text/x-python",
160 | "name": "python",
161 | "nbconvert_exporter": "python",
162 | "pygments_lexer": "ipython3",
163 | "version": "3.11.7"
164 | }
165 | },
166 | "nbformat": 4,
167 | "nbformat_minor": 2
168 | }
169 |
--------------------------------------------------------------------------------
/docs/jupyter_notebooks/document_rag.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 18,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "ename": "ImportError",
10 | "evalue": "cannot import name 'create_model' from 'langchain_core.runnables.utils' (/Users/janbanot/.pyenv/versions/3.11.7/envs/jupyter/lib/python3.11/site-packages/langchain_core/runnables/utils.py)",
11 | "output_type": "error",
12 | "traceback": [
13 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
14 | "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
15 | "Cell \u001b[0;32mIn[18], line 9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpenAIEmbeddings\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ChatOpenAI\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m hub\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moutput_parsers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m StrOutputParser\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrunnables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RunnablePassthrough\n",
16 | "File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/jupyter/lib/python3.11/site-packages/langchain/hub.py:10\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mload\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdump\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dumps\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mload\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mload\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m loads\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprompts\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BasePromptTemplate\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m TYPE_CHECKING:\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchainhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Client\n",
17 | "File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/jupyter/lib/python3.11/site-packages/langchain_core/prompts/__init__.py:27\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"**Prompt** is the input to the model.\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03mPrompt is often constructed\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 25\u001b[0m \n\u001b[1;32m 26\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m \u001b[38;5;66;03m# noqa: E501\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprompts\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 28\u001b[0m BasePromptTemplate,\n\u001b[1;32m 29\u001b[0m aformat_document,\n\u001b[1;32m 30\u001b[0m format_document,\n\u001b[1;32m 31\u001b[0m )\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprompts\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mchat\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 33\u001b[0m AIMessagePromptTemplate,\n\u001b[1;32m 34\u001b[0m BaseChatPromptTemplate,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 39\u001b[0m SystemMessagePromptTemplate,\n\u001b[1;32m 40\u001b[0m )\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprompts\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfew_shot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 42\u001b[0m FewShotChatMessagePromptTemplate,\n\u001b[1;32m 43\u001b[0m FewShotPromptTemplate,\n\u001b[1;32m 44\u001b[0m )\n",
18 | "File \u001b[0;32m~/.pyenv/versions/3.11.7/envs/jupyter/lib/python3.11/site-packages/langchain_core/prompts/base.py:31\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrunnables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RunnableConfig, RunnableSerializable\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrunnables\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ensure_config\n\u001b[0;32m---> 31\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrunnables\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m create_model\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m TYPE_CHECKING:\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocuments\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Document\n",
19 | "\u001b[0;31mImportError\u001b[0m: cannot import name 'create_model' from 'langchain_core.runnables.utils' (/Users/janbanot/.pyenv/versions/3.11.7/envs/jupyter/lib/python3.11/site-packages/langchain_core/runnables/utils.py)"
20 | ]
21 | }
22 | ],
23 | "source": [
24 | "import os\n",
25 | "from dotenv import load_dotenv\n",
26 | "import bs4\n",
27 | "from langchain_community.document_loaders import WebBaseLoader\n",
28 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
29 | "from langchain_chroma import Chroma\n",
30 | "from langchain_openai import OpenAIEmbeddings\n",
31 | "from langchain_openai import ChatOpenAI\n",
32 | "from langchain import hub\n",
33 | "from langchain_core.output_parsers import StrOutputParser\n",
34 | "from langchain_core.runnables import RunnablePassthrough\n",
35 | "\n",
36 | "load_dotenv()\n",
37 | "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
38 | "\n",
39 | "# Only keep post title, headers, and content from the full HTML.\n",
40 | "bs4_strainer = bs4.SoupStrainer(class_=(\"post-title\", \"post-header\", \"post-content\"))\n",
41 | "loader = WebBaseLoader(\n",
42 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n",
43 | " bs_kwargs={\"parse_only\": bs4_strainer},\n",
44 | ")\n",
45 | "docs = loader.load()\n",
46 | "\n",
47 | "text_splitter = RecursiveCharacterTextSplitter(\n",
48 | " chunk_size=1000, chunk_overlap=200, add_start_index=True\n",
49 | ")\n",
50 | "all_splits = text_splitter.split_documents(docs)\n",
51 | "\n",
52 | "vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())\n",
53 | "\n",
54 | "retriever = vectorstore.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": 6})\n",
55 | "\n",
56 | "retrieved_docs = retriever.invoke(\"What are the approaches to Task Decomposition?\")\n",
57 | "\n",
58 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
59 | "\n",
60 | "prompt = hub.pull(\"rlm/rag-prompt\")\n",
61 | "\n",
62 | "example_messages = prompt.invoke(\n",
63 | " {\"context\": \"filler context\", \"question\": \"filler question\"}\n",
64 | ").to_messages()\n",
65 | "\n",
66 | "def format_docs(docs):\n",
67 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
68 | "\n",
69 | "\n",
70 | "rag_chain = (\n",
71 | " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
72 | " | prompt\n",
73 | " | llm\n",
74 | " | StrOutputParser()\n",
75 | ")\n",
76 | "\n",
77 | "for chunk in rag_chain.stream(\"What is Task Decomposition?\"):\n",
78 | " print(chunk, end=\"\", flush=True)"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {},
85 | "outputs": [
86 | {
87 | "data": {
88 | "text/plain": [
89 | "{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
90 | " 'start_index': 7056}"
91 | ]
92 | },
93 | "execution_count": 10,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "all_splits[10].metadata"
100 | ]
101 | }
102 | ],
103 | "metadata": {
104 | "kernelspec": {
105 | "display_name": "personal_assistant",
106 | "language": "python",
107 | "name": "python3"
108 | },
109 | "language_info": {
110 | "codemirror_mode": {
111 | "name": "ipython",
112 | "version": 3
113 | },
114 | "file_extension": ".py",
115 | "mimetype": "text/x-python",
116 | "name": "python",
117 | "nbconvert_exporter": "python",
118 | "pygments_lexer": "ipython3",
119 | "version": "3.11.7"
120 | }
121 | },
122 | "nbformat": 4,
123 | "nbformat_minor": 2
124 | }
125 |
--------------------------------------------------------------------------------