├── .env.example ├── .gitignore ├── .nvmrc ├── .vscode └── launch.json ├── INSTALL ├── LICENSE ├── README.md ├── SETUP.md ├── alembic.ini ├── alembic ├── README ├── env.py ├── script.py.mako └── versions │ ├── 05e95b22503f_initial_migration.py │ ├── 7a4408168dda_add_events_table.py │ └── 8a6746b2ce16_add_timers_table.py ├── requirements.pip ├── src ├── __init__.py ├── db_models.py ├── infra │ ├── __init__.py │ ├── context.py │ ├── logger.py │ └── utils.py ├── message_handler.py ├── run.py └── services │ ├── message_db.py │ ├── messengers │ ├── __init__.py │ ├── messenger.py │ ├── messenger_factory.py │ ├── tg.py │ └── wa.py │ ├── open_ai │ ├── __init__.py │ └── query_openai.py │ ├── timers.py │ └── token_prediction │ └── token_predictor.py ├── test ├── __init__.py ├── data │ ├── backref_user1.json │ ├── brazil.json │ ├── comprehension │ │ ├── backref_ai1.json │ │ └── backref_ai2.json │ ├── fiction │ │ ├── story.json │ │ └── story2.json │ ├── finance │ │ ├── btc.json │ │ └── twostocks.json.fail │ ├── misc │ │ └── 42.json │ ├── pele1.json │ ├── ukpm.json │ └── weather1.json ├── local-test.py └── test-token-predictor.js └── tools ├── __init__.py ├── delete_chat.py ├── extract.py ├── multi_sender.py ├── stats.py └── user_settings.py /.env.example: -------------------------------------------------------------------------------- 1 | # Mandatory settings 2 | 3 | ## OpenAI API key 4 | OPENAI_API_KEY= 5 | 6 | ## Telegram bot token, acquired from the BotFather 7 | TELEGRAM_BOT_TOKEN= 8 | 9 | ## Telegram bot name, used when @mention-ing it 10 | TELEGRAM_BOT_NAME= 11 | 12 | ## Serper key, used for searching Google via serper.dev 13 | SERPER_API_KEY= 14 | 15 | # Optional settings 16 | AZURE_OPENAI_KEY= 17 | 18 | FACEBOOK_GRAPH_VERSION= 19 | WHATSAPP_BOT_TOKEN= 20 | WHATSAPP_PHONE_NUMBER_ID= 21 | WHATSAPP_PHONE_NUMBER= 22 | 23 | DB_CONNECTION_STRING= 24 | SQS_QUEUE_URL= 25 | 26 | DREAMSTUDIO_API_KEY= 27 | 28 | POSTHOG_API_KEY= 29 | 30 | 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | .env.dev 3 | .env.prod 4 | handleMessages.zip 5 | logs/ 6 | __pycache__/ 7 | .vscode/ 8 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | v16.19 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "args": ["test/data/ukpm.json"] 13 | "console": "integratedTerminal", 14 | "justMyCode": true, 15 | "env": { 16 | "PYTHONPATH" : "." 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | Building Python 3.11.3 2 | ---------------------- 3 | apt-get install wget build-essential libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev libffi-dev zlib1g-dev liblzma-dev -y 4 | 5 | cd /tmp 6 | 7 | wget https://www.python.org/ftp/python/3.11.3/Python-3.11.3.tgz 8 | tar zxf https://www.python.org/ftp/python/3.11.3/Python-3.11.3.tgz 9 | 10 | cd Python-3.11.3 11 | ./configure --enable-optimizations 12 | 13 | make -j 4 14 | 15 | make altinstall 16 | 17 | update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.9 1 18 | 19 | python3 -m pip install --upgrade pip 20 | 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Yair Lifshitz, Eli Siegel, Igor Shumsky, Danny Sivan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Robot 1-X: your friendly AI assistant 2 | Robot 1-X bot is your friendly AI assistant, available via WhatsApp and Telegram. 3 | 4 | WhatsApp: https://wa.me/16692001022 5 | 6 | Telegram: https://t.me/robot1xbot 7 | 8 | 9 | Support: https://wa.me/16692221028 10 | 11 | # What can Robot 1-X do? 12 | ## WhatsApp and Telegram ## 13 | - Answer questions and have a discussion, using GPT-3.5 or GPT-4 14 | - Write stories, plan trips, and tutor 15 | - Tools: search Google 16 | - Voice: answer recorded messages 17 | - Transcription: forward a voice message to Robot 1-X, and he will send you back the transcription 18 | 19 | ## Telegram only ## 20 | - Attend group chats! 21 | - Transcribes EVERY voice message, so you can read instead of listening 22 | - Responds only when @mention-d as beginning of message 23 | 24 | ## Initial setup 25 | 26 | Follow the instructions in SETUP.md. 27 | 28 | ## Local run 29 | 30 | After executing all instructions in SETUP.md, you can launch Robot 1-X with your private Telegram bot by typing: 31 | 32 | ```env R1X_STAGE=dev-local ./src/run.py``` 33 | 34 | Messages are stored locally. 35 | -------------------------------------------------------------------------------- /SETUP.md: -------------------------------------------------------------------------------- 1 | # Robot 1-X dependencies 2 | 3 | At a minimum, you need to have the following available to launch Robot 1-X locally: 4 | 5 | - Python packages installed 6 | - .env.dev-local settings file 7 | - Telegram bot token 8 | - OpenAI API key 9 | - serper.dev API token 10 | 11 | ## Python packages 12 | 13 | Robot 1-X is tested on Python 3.11. 14 | He may or may not operate on older Python versions. 15 | 16 | To install all dependencies, execute: 17 | 18 | ```pip3 install -r requirements.pip``` 19 | 20 | ## .env.dev-local settings file 21 | 22 | Copy .env.example to .env.dev-local. 23 | 24 | ## Telegram bot 25 | 26 | Testing Robot 1-X requires, at a minimum, a Telegram bot. 27 | You will need to create a bot, then update your bot's token and name under .env.dev-local. 28 | The process takes about 2 minutes. 29 | 30 | Creating a Telegram bot: https://core.telegram.org/bots/tutorial#obtain-your-bot-token. 31 | 32 | After creating the bot, update the *TELEGRAM_BOT_TOKEN* and *TELEGRAM_BOT_NAME* parameters in .env.dev-local. 33 | 34 | ## OpenAI 35 | 36 | Create an account with OpenAI: https://platform.openai.com. 37 | 38 | Then, create an OpenAI API key, here: https://platform.openai.com/account/api-keys 39 | 40 | Update *OPENAI_API_KEY* in .env.dev-local. 41 | 42 | ## serper.dev 43 | 44 | Serper provides you with programmatic access to Google Search, which Robot 1-X relies on. 45 | Go to https://serper.dev, sign up, and get your API token. 46 | 47 | Update *SERPER_API_KEY* in .env.dev-local. 48 | 49 | -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s 8 | # Uncomment the line below if you want the files to be prepended with date and time 9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file 10 | # for all available tokens 11 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s 12 | 13 | # sys.path path, will be prepended to sys.path if present. 14 | # defaults to the current working directory. 15 | prepend_sys_path = . 16 | 17 | # timezone to use when rendering the date within the migration file 18 | # as well as the filename. 19 | # If specified, requires the python-dateutil library that can be 20 | # installed by adding `alembic[tz]` to the pip requirements 21 | # string value is passed to dateutil.tz.gettz() 22 | # leave blank for localtime 23 | # timezone = 24 | 25 | # max length of characters to apply to the 26 | # "slug" field 27 | # truncate_slug_length = 40 28 | 29 | # set to 'true' to run the environment during 30 | # the 'revision' command, regardless of autogenerate 31 | # revision_environment = false 32 | 33 | # set to 'true' to allow .pyc and .pyo files without 34 | # a source .py file to be detected as revisions in the 35 | # versions/ directory 36 | # sourceless = false 37 | 38 | # version location specification; This defaults 39 | # to alembic/versions. When using multiple version 40 | # directories, initial revisions must be specified with --version-path. 41 | # The path separator used here should be the separator specified by "version_path_separator" below. 42 | # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions 43 | 44 | # version path separator; As mentioned above, this is the character used to split 45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. 46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. 47 | # Valid values for version_path_separator are: 48 | # 49 | # version_path_separator = : 50 | # version_path_separator = ; 51 | # version_path_separator = space 52 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 53 | 54 | # set to 'true' to search source files recursively 55 | # in each "version_locations" directory 56 | # new in Alembic version 1.10 57 | # recursive_version_locations = false 58 | 59 | # the output encoding used when revision files 60 | # are written from script.py.mako 61 | # output_encoding = utf-8 62 | 63 | sqlalchemy.url = driver://user:pass@localhost/dbname 64 | 65 | 66 | [post_write_hooks] 67 | # post_write_hooks defines scripts or Python functions that are run 68 | # on newly generated revision scripts. See the documentation for further 69 | # detail and examples 70 | 71 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 72 | # hooks = black 73 | # black.type = console_scripts 74 | # black.entrypoint = black 75 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 76 | 77 | # Logging configuration 78 | [loggers] 79 | keys = root,sqlalchemy,alembic 80 | 81 | [handlers] 82 | keys = console 83 | 84 | [formatters] 85 | keys = generic 86 | 87 | [logger_root] 88 | level = WARN 89 | handlers = console 90 | qualname = 91 | 92 | [logger_sqlalchemy] 93 | level = WARN 94 | handlers = 95 | qualname = sqlalchemy.engine 96 | 97 | [logger_alembic] 98 | level = INFO 99 | handlers = 100 | qualname = alembic 101 | 102 | [handler_console] 103 | class = StreamHandler 104 | args = (sys.stderr,) 105 | level = NOTSET 106 | formatter = generic 107 | 108 | [formatter_generic] 109 | format = %(levelname)-5.5s [%(name)s] %(message)s 110 | datefmt = %H:%M:%S 111 | -------------------------------------------------------------------------------- /alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /alembic/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | import os 4 | import dotenv 5 | 6 | from sqlalchemy import engine_from_config 7 | from sqlalchemy import pool 8 | 9 | from alembic import context 10 | 11 | 12 | # this is the Alembic Config object, which provides 13 | # access to the values within the .ini file in use. 14 | config = context.config 15 | 16 | STAGE = os.environ.get('R1X_STAGE', 'dev') 17 | dotenv.load_dotenv(f'.env.{STAGE}') 18 | 19 | config.set_main_option( 20 | "sqlalchemy.url", os.environ["DB_CONNECTION_STRING"] 21 | ) 22 | 23 | # Interpret the config file for Python logging. 24 | # This line sets up loggers basically. 25 | if config.config_file_name is not None: 26 | fileConfig(config.config_file_name) 27 | 28 | # add your model's MetaData object here 29 | # for 'autogenerate' support 30 | # from myapp import mymodel 31 | # target_metadata = mymodel.Base.metadata 32 | from sqlalchemy import MetaData 33 | import src.db_models 34 | target_metadata = src.db_models.Base.metadata 35 | #target_metadata = None 36 | 37 | # other values from the config, defined by the needs of env.py, 38 | # can be acquired: 39 | # my_important_option = config.get_main_option("my_important_option") 40 | # ... etc. 41 | 42 | 43 | def run_migrations_offline() -> None: 44 | """Run migrations in 'offline' mode. 45 | 46 | This configures the context with just a URL 47 | and not an Engine, though an Engine is acceptable 48 | here as well. By skipping the Engine creation 49 | we don't even need a DBAPI to be available. 50 | 51 | Calls to context.execute() here emit the given string to the 52 | script output. 53 | 54 | """ 55 | url = config.get_main_option("sqlalchemy.url") 56 | context.configure( 57 | url=url, 58 | target_metadata=target_metadata, 59 | literal_binds=True, 60 | dialect_opts={"paramstyle": "named"}, 61 | ) 62 | 63 | with context.begin_transaction(): 64 | context.run_migrations() 65 | 66 | 67 | def run_migrations_online() -> None: 68 | """Run migrations in 'online' mode. 69 | 70 | In this scenario we need to create an Engine 71 | and associate a connection with the context. 72 | 73 | """ 74 | connectable = engine_from_config( 75 | config.get_section(config.config_ini_section, {}), 76 | prefix="sqlalchemy.", 77 | poolclass=pool.NullPool, 78 | ) 79 | 80 | with connectable.connect() as connection: 81 | context.configure( 82 | connection=connection, target_metadata=target_metadata 83 | ) 84 | 85 | with context.begin_transaction(): 86 | context.run_migrations() 87 | 88 | 89 | if context.is_offline_mode(): 90 | run_migrations_offline() 91 | else: 92 | run_migrations_online() 93 | -------------------------------------------------------------------------------- /alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade() -> None: 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade() -> None: 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /alembic/versions/05e95b22503f_initial_migration.py: -------------------------------------------------------------------------------- 1 | """Initial migration. 2 | 3 | Revision ID: 05e95b22503f 4 | Revises: 5 | Create Date: 2023-05-10 01:55:00.147864 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '05e95b22503f' 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.drop_index('user_settings_created_at', table_name='user_settings') 22 | op.drop_index('user_settings_user_id', table_name='user_settings') 23 | op.create_index(op.f('ix_user_settings_createdAt'), 'user_settings', ['createdAt'], unique=False) 24 | op.create_index(op.f('ix_user_settings_user_id'), 'user_settings', ['user_id'], unique=False) 25 | # ### end Alembic commands ### 26 | 27 | 28 | def downgrade() -> None: 29 | # ### commands auto generated by Alembic - please adjust! ### 30 | op.drop_index(op.f('ix_user_settings_user_id'), table_name='user_settings') 31 | op.drop_index(op.f('ix_user_settings_createdAt'), table_name='user_settings') 32 | op.create_index('user_settings_user_id', 'user_settings', ['user_id'], unique=False) 33 | op.create_index('user_settings_created_at', 'user_settings', ['createdAt'], unique=False) 34 | # ### end Alembic commands ### 35 | -------------------------------------------------------------------------------- /alembic/versions/7a4408168dda_add_events_table.py: -------------------------------------------------------------------------------- 1 | """Add events table. 2 | 3 | Revision ID: 7a4408168dda 4 | Revises: 05e95b22503f 5 | Create Date: 2023-05-14 23:03:50.906104 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects.postgresql import JSONB 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '7a4408168dda' 14 | down_revision = '05e95b22503f' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | def upgrade(): 19 | op.create_table( 20 | 'events', 21 | sa.Column('id', sa.Integer, primary_key=True), 22 | sa.Column('type', sa.String), 23 | sa.Column('ref_table', sa.String), 24 | sa.Column('ref_id', sa.Integer), 25 | sa.Column('body', JSONB), 26 | sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('NOW()')), 27 | ) 28 | op.create_index('ix_events_type', 'events', ['type']) 29 | op.create_index('ix_events_ref', 'events', ['ref_table', 'ref_id']) 30 | 31 | def downgrade(): 32 | op.drop_index('ix_events_ref', table_name='events') 33 | op.drop_index('ix_events_type', table_name='events') 34 | op.drop_table('events') 35 | 36 | -------------------------------------------------------------------------------- /alembic/versions/8a6746b2ce16_add_timers_table.py: -------------------------------------------------------------------------------- 1 | """add timers table 2 | 3 | Revision ID: 8a6746b2ce16 4 | Revises: 7a4408168dda 5 | Create Date: 2023-05-20 01:05:43.449156 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | from sqlalchemy.dialects.postgresql import JSONB 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '8a6746b2ce16' 14 | down_revision = '7a4408168dda' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | def upgrade(): 19 | op.create_table( 20 | 'timers', 21 | sa.Column('id', sa.Integer, primary_key=True), 22 | sa.Column('chat_id', sa.String, index=True), 23 | sa.Column('trigger_timestamp', sa.DateTime, index=True), 24 | sa.Column('data', JSONB), 25 | sa.Column('created_at', sa.DateTime), 26 | sa.Column('updated_at', sa.DateTime) 27 | ) 28 | 29 | def downgrade(): 30 | op.drop_table('timers') 31 | 32 | -------------------------------------------------------------------------------- /requirements.pip: -------------------------------------------------------------------------------- 1 | python-box==7.0.1 2 | requests==2.29.0 3 | tiktoken==0.3.3 4 | SQLAlchemy==2.0.15 5 | sqlalchemy-stubs==0.4 6 | python-telegram-bot==20.3 7 | backoff==2.2.1 8 | openai==0.27.6 9 | python-dotenv==1.0.0 10 | psycopg2==2.9.6 11 | posthog==3.0.1 12 | pydub==0.25.1 13 | pydub-stubs==0.25.1.0 14 | boto3==1.26.125 15 | numpy==1.24.3 16 | langchain==0.0.189 17 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/__init__.py -------------------------------------------------------------------------------- /src/db_models.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | 4 | import sqlalchemy 5 | from sqlalchemy import create_engine, func 6 | from sqlalchemy import Boolean, Column, DateTime, Index, Integer, JSON, String, Text, text, TypeDecorator 7 | from sqlalchemy.dialects.postgresql import JSONB 8 | from sqlalchemy.ext.declarative import declarative_base 9 | from sqlalchemy.orm import sessionmaker 10 | from sqlalchemy.engine.url import URL 11 | 12 | # JSONB is not supported by SQLite, but is supported by PostgreSQL. 13 | # DialectAdapter selects the right one is used per database type. 14 | class DialectAdapter(TypeDecorator): 15 | impl = JSON 16 | 17 | def load_dialect_impl(self, dialect): 18 | if dialect.name == 'postgresql': 19 | return dialect.type_descriptor(JSONB()) 20 | else: 21 | return dialect.type_descriptor(JSON()) 22 | 23 | ### Start of table definitions ### 24 | 25 | Base = declarative_base() 26 | metadata = Base.metadata 27 | 28 | class Message(Base): 29 | __tablename__ = 'Messages' 30 | __table_args__ = ( 31 | Index('index_on_messages_chat_id_message_id', 'chatId', 'messageId', unique=True), 32 | Index('index_on_messages_created_at_chat_id', 'createdAt', 'chatId') 33 | ) 34 | 35 | id = Column(Integer, primary_key=True) 36 | source = Column(String(255)) 37 | messageTimestamp = Column(DateTime(True)) 38 | chatType = Column(String(255)) 39 | chatId = Column(String(255)) 40 | senderId = Column(String(255)) 41 | isSentByMe = Column(Boolean) 42 | messageId = Column(String(255)) 43 | replyToMessageId = Column(String(255)) 44 | kind = Column(String(255)) 45 | body = Column(Text) 46 | rawSource = Column(JSON) 47 | createdAt = Column(DateTime(True), nullable=False) 48 | updatedAt = Column(DateTime(True), nullable=False) 49 | 50 | 51 | class SequelizeMeta(Base): 52 | __tablename__ = 'SequelizeMeta' 53 | 54 | name = Column(String(255), primary_key=True) 55 | 56 | 57 | class UserSettings(Base): 58 | __tablename__ = 'user_settings' 59 | 60 | id = Column(Integer, primary_key=True) 61 | user_id = Column(String(255), nullable=False, index=True) 62 | settings = Column(DialectAdapter, nullable=False) 63 | version = Column(Integer, nullable=False) 64 | createdAt = Column(DateTime(True), nullable=False, index=True) 65 | updatedAt = Column(DateTime(True), nullable=False) 66 | 67 | class Event(Base): 68 | __tablename__ = 'events' 69 | 70 | id = Column(Integer, primary_key=True) 71 | type = Column(String) 72 | ref_table = Column(String) 73 | ref_id = Column(Integer) 74 | body = Column(DialectAdapter) 75 | created_at = Column(DateTime(timezone=True), default=func.now(), nullable=False) 76 | 77 | __table_args__ = ( 78 | sqlalchemy.Index('ix_events_type', 'type'), 79 | sqlalchemy.Index('ix_events_ref', 'ref_table', 'ref_id'), 80 | ) 81 | 82 | class Timer(Base): 83 | __tablename__ = 'timers' 84 | 85 | id = Column(Integer, primary_key=True) 86 | chat_id = Column(String, index=True) 87 | trigger_timestamp = Column(DateTime, index=True) 88 | data = Column(DialectAdapter) 89 | created_at = Column(DateTime) 90 | updated_at = Column(DateTime) 91 | 92 | ### End of table definitions ### 93 | 94 | # Set up the database connection 95 | engine = create_engine(os.environ['DB_CONNECTION_STRING']) 96 | 97 | # Create a session factory 98 | Session = sessionmaker(bind=engine) 99 | 100 | # Register models 101 | Base.metadata.create_all(engine) 102 | -------------------------------------------------------------------------------- /src/infra/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/infra/__init__.py -------------------------------------------------------------------------------- /src/infra/context.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from typing import Any, Dict, Union 3 | from infra import logger 4 | 5 | 6 | class ThreadSafeCounter: 7 | def __init__(self): 8 | self._counter = 0 9 | self._lock = threading.Lock() 10 | 11 | def get_and_increment(self): 12 | with self._lock: 13 | val = self._counter 14 | self._counter += 1 15 | return val 16 | 17 | # Usage 18 | counter = ThreadSafeCounter() 19 | 20 | class Context(object): 21 | def __init__(self): 22 | self.user_channel = None # type: str 23 | self.user_settings = {} # type: Dict[str, Any] 24 | 25 | self.msg_count = counter.get_and_increment() 26 | self.logger = logger.create_logging_context(self.msg_count) 27 | 28 | self.stats = {} 29 | 30 | def log(self, message:Any, *args:Any) -> None: 31 | self.logger.log(message, args) 32 | 33 | def set_stat(self, key: str, value: Union[int, bool, float, str]): 34 | self.stats[key] = value 35 | -------------------------------------------------------------------------------- /src/infra/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from logging.handlers import TimedRotatingFileHandler 4 | 5 | # This code was migrated from node.js to Python using ChatGPT. 6 | # Rotation is not necessarily working well. 7 | max_file_size = os.environ.get("MAX_LOG_FILE_SIZE", 100 * 1024 * 1024) 8 | max_log_files = int(os.environ.get("MAX_LOG_FILES", 50)) 9 | 10 | log_formatter = logging.Formatter('%(asctime)s.%(msecs)03d %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 11 | 12 | file_handler = TimedRotatingFileHandler('./logs/r1x.log', when='D', interval=1, backupCount=max_log_files) 13 | file_handler.setFormatter(log_formatter) 14 | file_handler.setLevel(logging.INFO) 15 | file_handler.suffix = '%Y-%m-%d' 16 | file_handler.extMatch = file_handler.extMatch 17 | 18 | console_handler = logging.StreamHandler() 19 | console_handler.setFormatter(log_formatter) 20 | console_handler.setLevel(logging.INFO) 21 | 22 | logger = logging.getLogger() 23 | logger.setLevel(logging.INFO) 24 | logger.addHandler(file_handler) 25 | logger.addHandler(console_handler) 26 | 27 | class log_context(): 28 | def __init__(self, context): 29 | self.context = context; 30 | 31 | def log(self, message, *args): 32 | merged_message = f"[{self.context}] {message} {' '.join(str(arg) for arg in args)}" 33 | logger.info(merged_message) 34 | 35 | def create_logging_context(context): 36 | return log_context(context) 37 | -------------------------------------------------------------------------------- /src/infra/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import sys 4 | from pathlib import Path 5 | from dotenv import load_dotenv 6 | from pydub import AudioSegment 7 | 8 | from infra.context import Context 9 | from infra.logger import logger 10 | 11 | def download_stream_file(ctx:Context, url, path, headers=None): 12 | # Create the directory if it doesn't exist 13 | dir_path = Path(path).parent 14 | os.makedirs(dir_path, exist_ok=True) 15 | 16 | is_successful = False # Variable to track download status 17 | 18 | response = requests.get(url, headers=headers, stream=True) 19 | 20 | with open(path, 'wb') as file: 21 | for chunk in response.iter_content(chunk_size=8192): 22 | file.write(chunk) 23 | 24 | if response.status_code == 200: 25 | ctx.log("downloadFile succeeded") 26 | is_successful = True 27 | 28 | return is_successful 29 | 30 | def convert_audio_to_mp3(ctx:Context, orig_file_path:str, mp3_file_path:str) -> str: 31 | audio = AudioSegment.from_file(orig_file_path) 32 | audio.export(mp3_file_path, format="mp3") 33 | ctx.log("convertAudioToMp3 succeeded") 34 | 35 | return mp3_file_path 36 | 37 | def load_env(): 38 | stage = os.environ.get("R1X_STAGE", "dev") 39 | logger.info(f"Running R1X bot in {stage} mode...") 40 | 41 | load_dotenv(f"./.env.{stage}") 42 | 43 | 44 | # If no database is provided, resort to a locally-hosted SQLite version. 45 | # Typically used for testing. 46 | if os.environ.get('DB_CONNECTION_STRING', '') == '': 47 | os.environ['DB_CONNECTION_STRING'] = 'sqlite:///file::memory:?cache=shared' 48 | 49 | local_dev_required_envs = ['OPENAI_API_KEY', 'TELEGRAM_BOT_TOKEN', 'TELEGRAM_BOT_NAME', 'SERPER_API_KEY'] 50 | all_required_envs = local_dev_required_envs + ['AZURE_OPENAI_KEY', 'FACEBOOK_GRAPH_VERSION', 'WHATSAPP_BOT_TOKEN', 'WHATSAPP_PHONE_NUMBER_ID', 'WHATSAPP_PHONE_NUMBER', 'DB_CONNECTION_STRING', 'SQS_QUEUE_URL', 'DREAMSTUDIO_API_KEY', 'POSTHOG_API_KEY'] 51 | 52 | required_envs = local_dev_required_envs if stage == 'dev-local' else all_required_envs 53 | 54 | # Ensure all reuqired environment variables are set up 55 | for v in required_envs: 56 | if os.environ.get(v, "") == "": 57 | print(f"Environment variable {v} is undefined or an empty string. Pleas configure it via you .env.{stage} file.") 58 | sys.exit(1) 59 | -------------------------------------------------------------------------------- /src/message_handler.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import os 4 | import pathlib 5 | import tempfile 6 | 7 | from posthog import Posthog 8 | from sqlalchemy import desc 9 | 10 | from typing import Any, Dict 11 | 12 | from services.messengers import messenger_factory 13 | from services.messengers.messenger import MessagingService 14 | 15 | from services.open_ai.query_openai import get_chat_completion, get_chat_completion_with_tools, create_transcription 16 | import db_models 17 | from services.message_db import insert_message, get_message_history 18 | import services.messengers as messengers 19 | from infra.context import Context 20 | 21 | posthog_client = None 22 | if os.environ.get('POSTHOG_API_KEY', '') != '': 23 | posthog_client = Posthog( 24 | os.environ['POSTHOG_API_KEY'], 25 | host='https://app.posthog.com' 26 | ) 27 | 28 | def posthog_capture(distinct_id, event, properties): 29 | if posthog_client == None: 30 | return 31 | 32 | posthog_client.capture(distinct_id=distinct_id, event=event, properties=properties) 33 | 34 | def get_user_settings(parsed_message) -> Dict[str, Any]: 35 | user_id = f"{parsed_message.source}:{parsed_message.chatId}" 36 | session = db_models.Session() 37 | 38 | settings = session.query(db_models.UserSettings) \ 39 | .filter(db_models.UserSettings.user_id == user_id) \ 40 | .order_by(desc(db_models.UserSettings.createdAt)) \ 41 | .limit(1) \ 42 | .one_or_none() 43 | 44 | session.close() 45 | 46 | return getattr(settings, 'settings', {}) 47 | 48 | 49 | def handle_incoming_message(ctx: Context, event): 50 | in_flight = {"working": True} 51 | 52 | try: 53 | handle_incoming_message_core(ctx, event, in_flight) 54 | except Exception as error: 55 | ctx.log("Message processing failed: ",error) 56 | raise Exception("Message processing failed.") 57 | finally: 58 | in_flight["working"] = False 59 | 60 | 61 | def handle_incoming_message_core(ctx:Context, event, in_flight): 62 | start = time.time() 63 | parsed_event = json.loads(event) 64 | ctx.log(parsed_event) 65 | messenger = messenger_factory.make_messenger_from_event(parsed_event) 66 | 67 | if messenger is None: 68 | return 69 | 70 | parse_message_result = messenger.parse_message(parsed_event["event"]) 71 | parsed_message, file_info = parse_message_result 72 | 73 | messenger.set_status_read(ctx, parsed_message.messageId) 74 | 75 | ctx.user_settings = get_user_settings(parsed_message) 76 | ctx.user_channel = ctx.user_settings.get('channel', 'stable') 77 | 78 | if not ctx.user_settings.get('enabled', False): 79 | messenger.send_message(ctx, { 80 | "chat_id": parsed_message["chatId"], 81 | "kind": "text", 82 | "body": "Robot 1-X is no longer accessible for free. If you require access, please send a WhatsApp message to +16692221028.\n\nIf you simply require ChatGPT on your smartphone, you can use https://play.google.com/store/apps/details?id=com.openai.chatgpt (Android) or https://apps.apple.com/us/app/chatgpt/id6448311069 (iPhone)." 83 | }) 84 | return 85 | 86 | is_typing = False 87 | 88 | if parsed_message.kind == "voice": 89 | is_typing = True 90 | handle_audio_message(ctx, messenger, parsed_message, file_info, in_flight) 91 | 92 | if parsed_message.isForwarded: 93 | return 94 | 95 | message = insert_message(ctx, parsed_message) 96 | 97 | if message.isSentByMe or message.body is None: 98 | return 99 | 100 | if not messenger.is_message_for_me(message): 101 | return 102 | 103 | if not is_typing: 104 | messenger.set_typing(in_flight) 105 | is_typing = True 106 | 107 | message_history = get_message_history(ctx, message) 108 | ctx.log("message history pulled.") 109 | 110 | if len(message_history) <= 1: 111 | ctx.log("sending intro message.") 112 | send_intro_message(ctx, messenger, parsed_message) 113 | return 114 | 115 | ctx.log("calling get_chat_completion...") 116 | messenger_name = "WhatsApp" if parsed_event["source"] == "wa" else "Telegram" 117 | completion = get_chat_completion_with_tools(ctx, messenger_name, message_history, direct=False) 118 | 119 | ctx.log({"completion": completion}) 120 | ctx.log("get_chat_completion done, result is ", completion.response) 121 | 122 | send_and_store(ctx, messenger, { 123 | 'chat_id': parsed_message.chatId, 124 | 'kind': "text", 125 | 'body': completion.response 126 | }) 127 | 128 | response_time_ms = int((time.time() - parsed_message.messageTimestamp) * 1000) 129 | processing_time_ms = int((time.time() - start) * 1000) 130 | completion_tokens_per_sec = completion.completionTokens / (processing_time_ms / 1000) 131 | 132 | ctx.set_stat('channel', ctx.user_channel) 133 | ctx.set_stat('prompt_tokens', completion.promptTokens) 134 | ctx.set_stat('completion_tokens', completion.completionTokens) 135 | ctx.set_stat('completion_tokens_per_sec', completion_tokens_per_sec) 136 | ctx.set_stat('total_tokens', completion.promptTokens + completion.completionTokens) 137 | ctx.set_stat('response_time_ms', response_time_ms) 138 | ctx.set_stat('processing_time_ms', processing_time_ms) 139 | 140 | ph_props = { 141 | 'senderId': parsed_message.senderId, 142 | } 143 | 144 | ph_props.update(ctx.stats) 145 | 146 | posthog_capture( 147 | distinct_id = f'{parsed_message.source}:{parsed_message.chatId}', 148 | event = 'reply-sent', 149 | properties = ph_props 150 | ) 151 | 152 | def handle_audio_message(ctx, messenger, parsed_message, file_info, in_flight): 153 | messenger.set_typing(in_flight) 154 | 155 | transcript = get_transcript(ctx, messenger, parsed_message, file_info) 156 | text = "\N{SPEAKING HEAD IN SILHOUETTE}\N{MEMO}: " + transcript 157 | 158 | send_attrs = { 159 | "chat_id": parsed_message.chatId, 160 | "kind": "text", 161 | "body": text, 162 | "quote_id": parsed_message.messageId 163 | } 164 | 165 | # Designed behavior: 166 | # 167 | # Forwarded messages: transcribe and exit 168 | # Original messages: transcribe and respond 169 | 170 | if parsed_message.isForwarded: 171 | parsed_message.body = "Please transcribe: " 172 | insert_message(ctx, parsed_message) 173 | send_and_store(ctx, messenger, send_attrs) 174 | else: 175 | parsed_message.body = transcript 176 | # Use messenger.send_message directly, so transcribed reply is not stored in DB 177 | messenger.send_message(ctx, send_attrs) 178 | 179 | posthog_capture( 180 | distinct_id = f"{parsed_message.source}:{parsed_message.chatId}", 181 | event = "message-transcribed", 182 | properties = { 183 | 'sender_id': parsed_message.senderId, 184 | 'channel': ctx.user_channel, 185 | 'length_in_seconds': -1 186 | } 187 | ) 188 | 189 | 190 | 191 | def send_intro_message(ctx:Context, messenger, parsed_message): 192 | intro_message_legal = """Robot 1-X at your service! 193 | 194 | First, be aware that while I always do my best to help, I am not a professional doctor, psychologist, banker or otherwise. 195 | Some of my replies may provide incorrect information about people, locations and events. 196 | Always check my suggestions with a professional. 197 | 198 | 199 | If you're under 18, you must have your parents' permission before you continue talking to me! 200 | 201 | Chatting with me means you agree to my Terms of Use (https://r1x.ai/terms-of-use) and Privacy policy (https://r1x.ai/privacy). 202 | Make sure to read them before continuing this chat.""" 203 | 204 | intro_message_overview = """Here are some things you can ask me for: 205 | 206 | - Write a bedtime story about Abigail and Jonathan, two superheroes who live next to a river. 207 | - Plan a 14-day road trip from Milan to Minsk. Include detailed suggestions about where to spend each day. 208 | - Rewrite the following text with spell-checking and punctuation: pleez send me all the docooments that is need for tomorrow flight im waiting for dem. 209 | - Please summarize the following text: . 210 | 211 | And, you can record a message instead of typing! 212 | 213 | How can I help?""" 214 | 215 | send_and_store(ctx, messenger, { 216 | "chat_id": parsed_message["chatId"], 217 | "kind": "text", 218 | "body": intro_message_legal 219 | }) 220 | 221 | send_and_store(ctx, messenger, { 222 | "chat_id": parsed_message["chatId"], 223 | "kind": "text", 224 | "body": intro_message_overview 225 | }) 226 | 227 | def get_transcript(ctx:Context, messenger, parsed_message, file_info): 228 | mp3_file_path = None 229 | 230 | audio_root = pathlib.Path(tempfile.gettempdir()) / 'r1x' / 'audio' 231 | audio_root.mkdir(exist_ok=True) 232 | 233 | with tempfile.TemporaryDirectory(dir=audio_root, ignore_cleanup_errors=True) as workdir: 234 | mp3_file_path = messenger.get_voice_mp3_file(ctx, parsed_message, file_info, pathlib.Path(workdir)) 235 | transcription = create_transcription(ctx, mp3_file_path) 236 | 237 | return transcription 238 | 239 | def send_and_store(ctx: Context, messenger: MessagingService, message_attributes): 240 | response = messenger.send_message(ctx, message_attributes) 241 | 242 | if response: 243 | insert_message(ctx, response) 244 | -------------------------------------------------------------------------------- /src/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import json 4 | import os 5 | 6 | import boto3 7 | from services.timers import alert_users 8 | 9 | from infra import logger 10 | from infra.context import Context 11 | 12 | import message_handler 13 | 14 | import threading 15 | import traceback 16 | 17 | from telegram import ForceReply, Update 18 | from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters 19 | 20 | NUM_CONSUMERS = 10 21 | 22 | QUEUE_URL = os.environ["SQS_QUEUE_URL"] 23 | 24 | def process_message(message): 25 | ctx = Context() 26 | result = message_handler.handle_incoming_message(ctx, message['Body']) 27 | ctx.log("Finished handling message") 28 | 29 | def single_sqs_handler(queue): 30 | while True: 31 | try: 32 | single_sqs_handler_core(queue) 33 | except Exception as e: 34 | logger.logger.error(f'Exception occurred; {e}; stack trace: ', traceback.format_exc()) 35 | 36 | def single_sqs_handler_core(queue): 37 | response = queue.receive_message(QueueUrl=QUEUE_URL, MaxNumberOfMessages=1, WaitTimeSeconds=20) 38 | 39 | if not 'Messages' in response: 40 | return 41 | 42 | # Single message each time 43 | message = response['Messages'][0] 44 | 45 | process_message(message) 46 | 47 | queue.delete_message(QueueUrl=QUEUE_URL, ReceiptHandle=message['ReceiptHandle']) 48 | 49 | def launch_sqs_threads(): 50 | logger.logger.info(f'Listening on {NUM_CONSUMERS} queues...') 51 | 52 | threads = [] 53 | 54 | for i in range(NUM_CONSUMERS): 55 | queue = boto3.client('sqs', region_name='eu-central-1') 56 | thread = threading.Thread(target=single_sqs_handler, args=(queue,)) 57 | thread.start() 58 | threads.append(thread) 59 | 60 | return threads 61 | 62 | async def handle_local_incoming_telegram_message(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: 63 | message = { 'Body' : json.dumps({ 'source' : 'tg', 'event' : json.loads(update.to_json()) }) } 64 | ctx = Context() 65 | 66 | process_message(message) 67 | 68 | def launch_local_telegram_listener(): 69 | # Create the Application and pass it your bot's token. 70 | application = Application.builder().token(os.environ['TELEGRAM_BOT_TOKEN']).build() 71 | 72 | # on non command i.e message - echo the message on Telegram 73 | tg_filters = (filters.AUDIO | filters.TEXT | filters.VOICE) & ~filters.COMMAND 74 | application.add_handler(MessageHandler(tg_filters, handle_local_incoming_telegram_message)) 75 | 76 | # Run the bot until the user presses Ctrl-C 77 | application.run_polling() 78 | 79 | # Threads to wait on; never reached 80 | return [] 81 | 82 | def main(): 83 | threads = [] 84 | 85 | timer_thread = threading.Thread(target=alert_users) 86 | timer_thread.start() 87 | threads.append(timer_thread) 88 | 89 | if os.environ['R1X_STAGE'] in ['dev', 'prod']: 90 | threads = launch_sqs_threads() 91 | else: 92 | threads = launch_local_telegram_listener() 93 | 94 | for thread in threads: 95 | thread.join() 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /src/services/message_db.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import and_, desc 2 | import db_models 3 | import datetime 4 | 5 | from infra.context import Context 6 | 7 | def insert_message(ctx:Context, attributes): 8 | source = attributes['source'] 9 | message_timestamp = datetime.datetime.fromtimestamp(attributes['messageTimestamp'], tz=datetime.timezone.utc) 10 | chat_type = attributes['chatType'] 11 | chat_id = attributes['chatId'] 12 | sender_id = attributes['senderId'] 13 | is_sent_by_me = attributes['isSentByMe'] 14 | message_id = attributes['messageId'] 15 | reply_to_message_id = attributes['replyToMessageId'] 16 | kind = attributes['kind'] 17 | body = attributes['body'] 18 | raw_source = attributes['rawSource'] 19 | 20 | ctx.log('insertMessage attributes:', attributes) 21 | 22 | with db_models.Session() as session: 23 | existing_message = session.query(db_models.Message).filter(and_(db_models.Message.chatId == chat_id, db_models.Message.messageId == message_id)).one_or_none() 24 | 25 | if existing_message: 26 | return existing_message 27 | 28 | now = datetime.datetime.now() 29 | 30 | message = db_models.Message( 31 | source=source, 32 | messageTimestamp=message_timestamp, 33 | chatType=chat_type, 34 | chatId=chat_id, 35 | senderId=sender_id, 36 | isSentByMe=is_sent_by_me, 37 | messageId=message_id, 38 | replyToMessageId=reply_to_message_id, 39 | kind=kind, 40 | body=body, 41 | rawSource=raw_source, 42 | createdAt=now, 43 | updatedAt=now 44 | ) 45 | 46 | session.add(message) 47 | session.commit() 48 | session.refresh(message) 49 | 50 | session.close() 51 | 52 | return message 53 | 54 | def get_message_history(ctx:Context, message, options=None): 55 | if options is None: 56 | options = {} 57 | 58 | limit = options.get('limit', 20) 59 | chat_id = message.chatId 60 | message_timestamp = message.messageTimestamp 61 | 62 | with db_models.Session() as session: 63 | messages = session.query(db_models.Message) \ 64 | .filter(and_(db_models.Message.chatId == chat_id, db_models.Message.messageTimestamp <= message_timestamp)) \ 65 | .order_by(desc(db_models.Message.createdAt)).limit(limit).all() 66 | 67 | session.close() 68 | 69 | return list(reversed(messages)) 70 | -------------------------------------------------------------------------------- /src/services/messengers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/services/messengers/__init__.py -------------------------------------------------------------------------------- /src/services/messengers/messenger.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Tuple 3 | 4 | from box import Box 5 | 6 | from infra.context import Context 7 | 8 | 9 | class MessageKindE: 10 | TEXT = 'text' 11 | VOICE = 'voice' 12 | AUDIO = 'audio' 13 | 14 | 15 | class MessagingService(ABC): 16 | def __init__(self, chat_id: str): 17 | super().__init__() 18 | self.chat_id = chat_id 19 | 20 | @abstractmethod 21 | def parse_message(self, message) -> Tuple[Box, Box]: 22 | pass 23 | 24 | @abstractmethod 25 | def send_message(self, ctx:Context, attributes) -> Box: 26 | pass 27 | 28 | @abstractmethod 29 | def send_contact(self, ctx:Context, name:str, handle:str): 30 | pass 31 | 32 | @abstractmethod 33 | def is_message_for_me(self, message) -> bool: 34 | pass 35 | 36 | @abstractmethod 37 | def set_typing(self, in_flight) ->None: 38 | pass 39 | 40 | @abstractmethod 41 | def get_voice_mp3_file(self, ctx:Context, parsed_message, file_info, work_dir) -> str: 42 | pass 43 | 44 | @abstractmethod 45 | def set_status_read(self, ctx:Context, message_id) -> None: 46 | pass -------------------------------------------------------------------------------- /src/services/messengers/messenger_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Tuple, Optional, Type, Callable 2 | from services.messengers.messenger import MessagingService 3 | from services.messengers.tg import TelegramMessenger 4 | from services.messengers.wa import WhatsappMessenger 5 | 6 | messenger_by_type: Dict[str, Type[MessagingService]] = {'tg': TelegramMessenger, 'wa': WhatsappMessenger} 7 | 8 | 9 | def make_messenger(messenger_chat_id: str) -> MessagingService: 10 | messenger_str, chat_id = messenger_chat_id.split(":") 11 | messenger = messenger_by_type[messenger_str](chat_id) 12 | return messenger 13 | 14 | 15 | def _make_wa_messenger_from_event(event: Dict) -> Optional[MessagingService]: 16 | entry_changes0 = event['event']['entry'][0]['changes'][0]['value'] 17 | if 'messages' not in entry_changes0: 18 | # not a message event. 19 | return None 20 | 21 | chat_id = entry_changes0['messages'][0]['from'] 22 | messenger = messenger_by_type[event['source']](chat_id) 23 | return messenger 24 | 25 | 26 | def _make_tg_messenger_from_event(event: Dict) -> MessagingService: 27 | chat_id = str(event['event']['message']['chat']['id']) 28 | messenger = messenger_by_type[event['source']](chat_id) 29 | return messenger 30 | 31 | 32 | messenger_factory_by_type: Dict[str, Callable] = {'tg': _make_tg_messenger_from_event, 'wa': _make_wa_messenger_from_event} 33 | 34 | 35 | def make_messenger_from_event(event: Dict) -> Optional[MessagingService]: 36 | messenger = messenger_factory_by_type[event['source']](event) 37 | return messenger 38 | -------------------------------------------------------------------------------- /src/services/messengers/tg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import tempfile 4 | from typing import Optional 5 | import requests 6 | 7 | from infra.context import Context 8 | from services.messengers.messenger import MessageKindE, MessagingService 9 | from infra import utils 10 | from box import Box 11 | 12 | import threading 13 | 14 | TELEGRAM_SENDER_ID = os.environ['TELEGRAM_BOT_TOKEN'].split(':')[0] 15 | 16 | class TelegramMessenger(MessagingService): 17 | 18 | def _get_message_kind(self, message) -> Optional[str]: 19 | if 'text' in message: 20 | return MessageKindE.TEXT 21 | elif 'voice' in message: 22 | return MessageKindE.VOICE 23 | elif 'audio' in message: 24 | return MessageKindE.AUDIO 25 | return None 26 | 27 | def parse_message(self, message): 28 | message = message['message'] 29 | 30 | source = "tg" 31 | message_timestamp = message['date'] 32 | chat_type = message['chat']['type'] 33 | 34 | sender_id = str(message['from']['id']) 35 | is_sent_by_me = message['from']['id'] == int(TELEGRAM_SENDER_ID) 36 | is_forwarded = message.get('forward_from', None) != None 37 | messageId = str(message['message_id']) 38 | reply_to_message_id = message['reply_to_message']['message_id'] if 'reply_to_message' in message else None 39 | kind = self._get_message_kind(message) 40 | body = message['text'] if 'text' in message else None 41 | fileId = message['voice']['file_id'] if kind == MessageKindE.VOICE else None 42 | fileUniqueId = message['voice']['file_unique_id'] if kind == MessageKindE.VOICE else None 43 | 44 | return ( 45 | Box({ 46 | 'source': source, 47 | 'messageTimestamp': message_timestamp, 48 | 'chatType': chat_type, 49 | 'chatId': self.chat_id, 50 | 'senderId': sender_id, 51 | 'isSentByMe': is_sent_by_me, 52 | 'isForwarded': is_forwarded, 53 | 'messageId': messageId, 54 | 'replyToMessageId': reply_to_message_id, 55 | 'kind': kind, 56 | 'body': body, 57 | 'rawSource': message 58 | }), 59 | Box({ 60 | 'fileId': fileId, 61 | 'fileUniqueId': fileUniqueId 62 | }) 63 | ) 64 | 65 | def send_message(self, ctx:Context, attributes): 66 | quote_id = attributes.get('quote_id') 67 | kind = attributes.get('kind') 68 | body = attributes.get('body') 69 | 70 | if kind != "text": 71 | return 72 | 73 | args = {'chat_id': self.chat_id, 'text': body} 74 | if quote_id: 75 | args['reply_to_message_id'] = quote_id 76 | args['allow_sending_without_reply'] = True 77 | 78 | response = requests.post( 79 | f'https://api.telegram.org/bot{os.environ["TELEGRAM_BOT_TOKEN"]}/sendMessage', 80 | json=args 81 | ).json() 82 | 83 | if not response['ok']: 84 | return None 85 | 86 | message = {'message': response['result']} 87 | parsed_message, file_info = self.parse_message(message) 88 | 89 | return parsed_message 90 | 91 | def send_contact(self, ctx: Context, name:str, handle:str): 92 | args = {'chat_id': self.chat_id, 'text': f'https://t.me/{handle}'} 93 | response = requests.post( 94 | f'https://api.telegram.org/bot{os.environ["TELEGRAM_BOT_TOKEN"]}/sendMessage', 95 | json=args 96 | ).json() 97 | 98 | return response 99 | 100 | 101 | def is_message_for_me(self, msg) -> bool: 102 | if msg.chatType == "private": 103 | return True 104 | 105 | if msg.body.startswith(f'@{os.environ["TELEGRAM_BOT_NAME"]}'): 106 | return True 107 | 108 | if 'reply_to_message' in msg.rawSource and msg.rawSource['reply_to_message']['from']['id'] == int(TELEGRAM_SENDER_ID): 109 | return True 110 | 111 | return False 112 | 113 | 114 | def get_voice_mp3_file(self, ctx:Context, parsed_message, file_info, work_dir) -> str: 115 | ctx.log(f"getVoiceMp3File: {parsed_message}, {file_info}, {work_dir}") 116 | url = self._get_download_url(ctx, file_info.fileId) 117 | orig_file_path, mp3_file_path = self._get_audio_file_paths(ctx, parsed_message.chatId, file_info, work_dir) 118 | 119 | utils.download_stream_file(ctx, url, orig_file_path) 120 | utils.convert_audio_to_mp3(ctx, orig_file_path, mp3_file_path) 121 | 122 | return mp3_file_path 123 | 124 | def _get_download_url(self, ctx:Context, file_id): 125 | args = {"file_id": file_id} 126 | 127 | response = requests.post( 128 | f"https://api.telegram.org/bot{os.environ['TELEGRAM_BOT_TOKEN']}/getFile", 129 | json=args, 130 | ) 131 | data = response.json() 132 | 133 | if not data["ok"]: 134 | ctx.log(f"getDownloadUrl failed. response={data}") 135 | 136 | remote_file_path = data["result"]["file_path"] 137 | download_url = f"https://api.telegram.org/file/bot{os.environ['TELEGRAM_BOT_TOKEN']}/{remote_file_path}" 138 | 139 | ctx.log(f"getDownloadUrl: downloadUrl={download_url}") 140 | return download_url 141 | 142 | def _get_audio_file_paths(self, ctx:Context, chat_id, file_info, work_dir): 143 | orig_file_path = work_dir / 'audio.orig' 144 | mp3_file_path = work_dir / 'audio.mp3' 145 | 146 | ctx.log(f"getAudioFilePaths: origFilePath={orig_file_path}, mp3FilePath={mp3_file_path}") 147 | 148 | return orig_file_path, mp3_file_path 149 | 150 | def set_typing(self, in_flight): 151 | if not in_flight["working"]: 152 | return 153 | 154 | requests.post( 155 | f"https://api.telegram.org/bot{os.environ['TELEGRAM_BOT_TOKEN']}/sendChatAction", 156 | json={"chat_id": self.chat_id, "action": "typing"}, 157 | ) 158 | 159 | base_timeout = 6 160 | extra_timeout = random.randint(0, 1500) 161 | timeout = base_timeout + (extra_timeout / 1000) 162 | 163 | timer = threading.Timer(timeout, self.set_typing, args=(in_flight,)) 164 | timer.start() 165 | 166 | def set_status_read(self, ctx: Context, message_id) -> None: 167 | return 168 | -------------------------------------------------------------------------------- /src/services/messengers/wa.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import requests 4 | from services.messengers.messenger import MessageKindE, MessagingService 5 | from infra import utils 6 | from box import Box 7 | import time 8 | 9 | from infra.context import Context 10 | 11 | class EventKindE: 12 | STATUS_UPDATE = 'status_update' 13 | MESSAGE = 'message' 14 | 15 | 16 | class WhatsappMessenger(MessagingService): 17 | def _get_event_kind(self, value): 18 | if 'statuses' in value: 19 | return EventKindE.STATUS_UPDATE 20 | if 'messages' in value: 21 | return EventKindE.MESSAGE 22 | return None 23 | 24 | def _get_message_kind(self, value) -> str: 25 | if value['type'] == 'audio': 26 | return MessageKindE.VOICE 27 | return value['type'] 28 | 29 | def parse_message(self, message): 30 | source = "wa" 31 | event_kind = self._get_event_kind(message['entry'][0]['changes'][0]['value']) 32 | if event_kind != EventKindE.MESSAGE: 33 | return None 34 | 35 | message0 = message['entry'][0]['changes'][0]['value']['messages'][0] 36 | 37 | kind = self._get_message_kind(message0) 38 | message_timestamp = float(message0['timestamp']) 39 | sender_id = message0['from'] 40 | chat_type = "private" 41 | is_sent_by_me = sender_id == os.environ['WHATSAPP_PHONE_NUMBER'] 42 | is_forwarded = (message0.get('context', {}).get('forwarded', None) != None) 43 | message_id = message0['id'] 44 | reply_to_message_id = message0.get('context', {}).get('id') 45 | 46 | if kind == MessageKindE.TEXT: 47 | body = message0['text']['body'] 48 | else: 49 | body = None 50 | 51 | if kind == MessageKindE.VOICE: 52 | file_id = message0['audio']['id'] 53 | else: 54 | file_id = None 55 | 56 | file_unique_id = None 57 | 58 | return [Box({ 59 | "source": source, 60 | "messageTimestamp": message_timestamp, 61 | "chatType": chat_type, 62 | "chatId": self.chat_id, 63 | "senderId": sender_id, 64 | "isSentByMe": is_sent_by_me, 65 | "isForwarded" : is_forwarded, 66 | "messageId": message_id, 67 | "replyToMessageId": reply_to_message_id, 68 | "kind": kind, 69 | "body": body, 70 | "rawSource": message 71 | }), Box({ 72 | "fileId": file_id, 73 | "fileUniqueId": file_unique_id 74 | })] 75 | 76 | def _get_bot_generated_message(self, ctx:Context, send_message_response, attributes): 77 | quote_id = attributes.get('quote_id') 78 | kind = attributes.get('kind') 79 | body = attributes.get('body') 80 | 81 | message = { 82 | "entry": [ 83 | { 84 | "changes": [ 85 | { 86 | "value": { 87 | "messages": [ 88 | { 89 | "timestamp": (int(time.time() * 1000) / 1e3), 90 | "from": os.environ['WHATSAPP_PHONE_NUMBER'], 91 | "id": send_message_response['messages'][0]['id'], 92 | "type": kind, 93 | "text": { 94 | "body": body 95 | } 96 | } 97 | ] 98 | } 99 | } 100 | ] 101 | } 102 | ] 103 | } 104 | 105 | return message 106 | 107 | def send_message(self, ctx:Context, attributes): 108 | quote_id = attributes.get('quote_id') 109 | kind = attributes.get('kind') 110 | body = attributes.get('body') 111 | 112 | if kind != "text": 113 | return 114 | 115 | if len(body) > 4000: 116 | ctx.log('send_message: message body too long, %d > 4000' % len(body)) 117 | body = body[0:3999] 118 | 119 | headers = { 120 | "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}", 121 | "Content-Type": "application/json" 122 | } 123 | 124 | args = { 125 | "messaging_product": "whatsapp", 126 | "recipient_type": "individual", 127 | "to": self.chat_id, 128 | "type": "text", 129 | "text": { 130 | "preview_url": False, 131 | "body": body 132 | } 133 | } 134 | 135 | if quote_id: 136 | args["context"] = {"message_id": quote_id} 137 | 138 | response = self._post_message_request(ctx, headers, args) 139 | 140 | if response == None: 141 | return None 142 | 143 | message = self._get_bot_generated_message(ctx, response.json(), attributes) 144 | parsed_message, _ = self.parse_message(message) 145 | parsed_message.chatId = self.chat_id 146 | 147 | return parsed_message 148 | 149 | def _post_message_request(self, ctx:Context, headers:Dict[str,str], args): 150 | try: 151 | response = requests.post( 152 | f"https://graph.facebook.com/{os.environ['FACEBOOK_GRAPH_VERSION']}/{os.environ['WHATSAPP_PHONE_NUMBER_ID']}/messages", 153 | json=args, 154 | headers=headers 155 | ) 156 | response.raise_for_status() 157 | except requests.exceptions.RequestException as error: 158 | ctx.log(f"post_message_request: exception. error={error}") 159 | raise error 160 | return response 161 | 162 | def send_contact(self, ctx: Context, name:str, handle:str): 163 | headers = { 164 | "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}", 165 | "Content-Type": "application/json" 166 | } 167 | contact_args = { 168 | "messaging_product": "whatsapp", 169 | "recipient_type": "individual", 170 | "to": self.chat_id, 171 | "type": "contacts", 172 | "contacts": [ 173 | { 174 | "addresses": [], 175 | "emails": [], 176 | "name": { 177 | "first_name": name, 178 | "formatted_name": name, 179 | "last_name": "" 180 | }, 181 | "org": {}, 182 | "phones": [ 183 | { 184 | "phone": f"+{handle}", 185 | "type": "HOME", 186 | "wa_id": handle 187 | } 188 | ], 189 | "urls": [] 190 | } 191 | ] 192 | } 193 | response = self._post_message_request(ctx,headers,contact_args) 194 | return response.json() 195 | 196 | def is_message_for_me(self, msg) -> bool: 197 | if msg.chatType == "private": 198 | return True 199 | 200 | return False 201 | 202 | def get_voice_mp3_file(self, ctx:Context, parsed_message, file_info, work_dir) -> str: 203 | ctx.log(f"getVoiceMp3File: {parsed_message}, {file_info}, {work_dir}") 204 | url = self._get_download_url(ctx, file_info.fileId) 205 | orig_file_path, mp3_file_path = self._get_audio_file_paths(ctx, parsed_message.chatId, file_info, work_dir) 206 | 207 | headers = { 208 | "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}", 209 | } 210 | 211 | utils.download_stream_file(ctx, url, orig_file_path, headers) 212 | utils.convert_audio_to_mp3(ctx, orig_file_path, mp3_file_path) 213 | 214 | return mp3_file_path 215 | 216 | def _get_download_url(self, ctx:Context, file_id): 217 | ctx.log(f"getDownloadUrl: {file_id}") 218 | headers = { 219 | "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}", 220 | } 221 | 222 | try: 223 | response = requests.get( 224 | f"https://graph.facebook.com/{os.environ['FACEBOOK_GRAPH_VERSION']}/{file_id}?phone_number_id={os.environ['WHATSAPP_PHONE_NUMBER_ID']}", 225 | headers=headers 226 | ) 227 | response.raise_for_status() 228 | except requests.exceptions.RequestException as error: 229 | ctx.log(f"getDownloadUrl: exception. error={error}") 230 | raise error 231 | 232 | download_url = response.json()['url'] 233 | 234 | ctx.log(f"getDownloadUrl: downloadUrl={download_url}") 235 | return download_url 236 | 237 | def _get_audio_file_paths(self, ctx:Context, chat_id, file_info, work_dir): 238 | orig_file_path = work_dir / 'audio.orig' 239 | mp3_file_path = work_dir / 'audio.mp3' 240 | 241 | ctx.log(f"getAudioFilePaths: orgFilePath={orig_file_path}, mp3FilePath={mp3_file_path}") 242 | 243 | return orig_file_path, mp3_file_path 244 | 245 | 246 | def set_typing(self, in_flight): 247 | # TODO: igors - can't find WA API for typing indication. 248 | pass 249 | 250 | 251 | def set_status_read(self, ctx:Context, message_id): 252 | ctx.log("setStatusRead") 253 | headers = { 254 | "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}", 255 | "Content-Type": "application/json", 256 | } 257 | 258 | args = { 259 | "messaging_product": "whatsapp", 260 | "status": "read", 261 | "message_id": message_id, 262 | } 263 | 264 | try: 265 | response = requests.post( 266 | f"https://graph.facebook.com/{os.environ['FACEBOOK_GRAPH_VERSION']}/{os.environ['WHATSAPP_PHONE_NUMBER_ID']}/messages", 267 | json=args, 268 | headers=headers 269 | ) 270 | response.raise_for_status() 271 | except requests.exceptions.RequestException as error: 272 | ctx.log(f"setStatusRead: exception. error={error}") 273 | return 274 | 275 | if response.json().get('success') != True: 276 | ctx.log(f"setStatusRead: response is wrong. Compared field {response.json().get('success')}. Full response {response}") 277 | -------------------------------------------------------------------------------- /src/services/open_ai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/services/open_ai/__init__.py -------------------------------------------------------------------------------- /src/services/open_ai/query_openai.py: -------------------------------------------------------------------------------- 1 | import backoff 2 | import json 3 | import os 4 | import openai 5 | import time 6 | import re 7 | import requests 8 | import traceback 9 | from typing import Dict 10 | 11 | from box import Box 12 | from services.timers import invoke_alert_tool 13 | 14 | 15 | from services.token_prediction import token_predictor 16 | from infra.context import Context 17 | from langchain.utilities import google_serper 18 | 19 | OPENAI_SPEECH_TO_TEXT_MODEL = 'whisper-1' 20 | 21 | openai.api_key = os.environ['OPENAI_API_KEY'] 22 | 23 | 24 | def deep_clone(o): 25 | return json.loads(json.dumps(o)) 26 | 27 | 28 | def convert_message_to_chat_format(message): 29 | converted_message = { 30 | "role": "assistant" if message.isSentByMe else "user", 31 | "content": message.body, 32 | } 33 | return converted_message 34 | 35 | 36 | def get_system_message(ctx:Context, messenger_name): 37 | current_date = time.strftime("%B %d, %Y", time.gmtime()) 38 | 39 | system_message = { 40 | "role": "system", 41 | "content": f"""You are Robot 1-X (R1X), a helpful, cheerful assistant developed by the Planet Express team and integrated into a {messenger_name} chat. 42 | You are based on GPT-3.5 technology. More information about R1X is available at https://r1x.ai. 43 | Today is {current_date}. 44 | 45 | If Robot 1-X does not know, it truthfully says so. 46 | If user asks for information that Robot 1-X does not have but can estimate, Robot 1-X will provide the estimate, while mentioning it is an estimate and not a fact.""" 47 | } 48 | 49 | return system_message 50 | 51 | 52 | def db_messages2messages(messages): 53 | parsed_messages = [] 54 | 55 | for message in messages: 56 | if message.body is None: 57 | continue 58 | parsed_messages.append(convert_message_to_chat_format(message)) 59 | 60 | return parsed_messages 61 | 62 | 63 | def get_limited_message_history(ctx, messages, prompt_template): 64 | soft_token_limit = 2048 65 | hard_token_limit = 4000 66 | 67 | messages_upto_max_tokens = token_predictor.get_messages_upto_max_tokens( 68 | ctx, prompt_template, messages, soft_token_limit, hard_token_limit 69 | ) 70 | 71 | if len(messages_upto_max_tokens) == 0: 72 | return [] 73 | 74 | if messages_upto_max_tokens[0]["role"] == "assistant": 75 | messages_upto_max_tokens.pop(0) 76 | 77 | merged_messages = [] 78 | prev_role = None 79 | 80 | for message in messages_upto_max_tokens: 81 | if message["role"] == 'assistant': 82 | message["content"] = message["content"].removeprefix("\N{LEFT-POINTING MAGNIFYING GLASS}: ") 83 | 84 | if message["role"] == prev_role: 85 | merged_messages[-1]["content"] += f"\n{message['content']}" 86 | else: 87 | merged_messages.append(message) 88 | 89 | prev_role = message["role"] 90 | 91 | return merged_messages 92 | 93 | 94 | def get_chat_completion(ctx:Context, messenger_name, messages, direct): 95 | parsed_messages = deep_clone(messages) if direct else db_messages2messages(messages) 96 | 97 | system_message = get_system_message(ctx, messenger_name) 98 | messages_upto_max_tokens = get_limited_message_history( 99 | ctx, parsed_messages, system_message 100 | ) 101 | 102 | return get_chat_completion_core(ctx, messenger_name, messages_upto_max_tokens) 103 | 104 | @backoff.on_exception(backoff.expo, openai.error.RateLimitError, max_tries=3) 105 | def get_chat_completion_core(ctx, messenger_name, messages, model=None): 106 | if not model: 107 | model = "gpt-4" if ctx.user_channel == "canary" else "gpt-3.5-turbo" 108 | 109 | try: 110 | ctx.log("Messages: ", messages); 111 | ctx.log("invoking completion request.") 112 | 113 | completion = chat_completion_create_wrap(ctx, model, messages) 114 | 115 | ctx.log("getChatCompletionCore response: ", completion['choices'][0]['message']['content']) 116 | 117 | return Box({ 118 | "response": completion['choices'][0]['message']['content'], 119 | "promptTokens": completion['usage']['prompt_tokens'], 120 | "completionTokens": completion['usage']['completion_tokens'] 121 | }) 122 | except Exception as e: 123 | if hasattr(e, "response"): 124 | ctx.log(f"error: e.response={e.response}") 125 | else: 126 | ctx.log("error: e={e}", e) 127 | 128 | ctx.log("error generating completion from OpenAI.") 129 | raise e 130 | 131 | 132 | def get_prep_message(ctx : Context, messenger, is_final : bool) -> Dict[str, str]: 133 | current_date = time.strftime("%B %d, %Y", time.gmtime()) 134 | 135 | is_debug_prompt = False 136 | 137 | gpt_ver = 'GPT-4' if ctx.user_channel == 'canary' else 'GPT-3.5' 138 | 139 | prep_message_stable = { 140 | "role" : "user", 141 | "content" : f"""You are Robot 1-X (R1X), a helpful, cheerful assistant developed by the Planet Express team and integrated into a {messenger} chat. 142 | You are based on {gpt_ver} technology. More information about you is available at https://r1x.ai. 143 | 144 | I will provide a CHAT between R1X and a human, wrapped with tags: CHAT. Last speaker is the user. 145 | 146 | Your task is to provide R1X's answer. 147 | 148 | You can invoke one of the following tools to augment your knowledge before replying: 149 | 150 | ALERT: sets a reminder for the user. TOOL_INPUT=(seconds, text), where seconds is relative time in seconds from request to when alert should be provided. answer with an error message if the user provides an absolute time. 151 | SEARCH: performs a Google search and returns key results. Use this tool to fetch real-time, up-to-date information about world events. Its data is more reliable than your existing knowledge. TOOL_INPUT=search prompt. 152 | WEATHER: per-location 3-day weather forecast, at day granularity. It does not provide a finer-grained forecast. TOOL_INPUT=, both in English. TOOL_INPUT should always be a well-defined settlement and country/state. IMPORTANT: If you believe the right value for TOOL_INPUT is unknown/my location/similar, do not ask for the tool to be invoked and instead use the ANSWER format to ask the user for location information. 153 | 154 | For invoking a tool, provide your reply wrapped in REPLY tags, where REPLY is in JSON format with the following fields: TOOL, TOOL_INPUT. 155 | Examples: 156 | 157 | {{ "TOOL" : "ALERT", "TOOL_INPUT" : (240, "Do the dishes") }} 158 | {{ "TOOL" : "SEARCH", "TOOL_INPUT" : "Who is the current UK PM?" }} 159 | {{ "TOOL" : "WEATHER", "TOOL_INPUT" : "Tel Aviv, Israel" }} 160 | 161 | Use these exact formats, and do not deviate. 162 | 163 | Otherwise, provide your final reply wrapped in REPLY tags in a JSON format, with the following fields: ANSWER. 164 | Example: 165 | 166 | {{ "ANSWER" : "Current UK PM is Rishi Sunak" }} 167 | 168 | When providing a final answer, use this exact format, and do not deviate. 169 | IMPORTANT: ALWAYS wrap your final answer with tags, and in JSON format. 170 | 171 | Today's date is {current_date}. 172 | For up-to-date information about people, stocks and world events, ALWAYS use one of the tools available to you and DO NOT provide an answer. 173 | For fiction requests, use your knowledge and creativity to answer. 174 | If human request has no context of time, assume he is referring to current time period. 175 | All tools provided have real-time access to the internet; do not reply that you have no access to the internet, unless you have attempted to invoke the SEARCH tool first. Additionally, do not invoke a tool if the required TOOL_INPUT is unknown, vague, or not provided. Always follow the IMPORTANT note in the tool description. 176 | If you have missing data and ONLY if you cannot use the tools provided to fetch it, try to estimate; in these cases, let the user know your answer is an estimate. 177 | 178 | Don't provide your response until you made sure it is valid, and meets all prerequisites laid out for tool invocation. 179 | 180 | WHEN PROVIDING A FINAL ANSWER TO THE USER, NEVER MENTION THE SEARCH AND WEATHER TOOLS DIRECTLY, AND DO NOT SUGGEST THAT THE USER UTILIZES THEM. 181 | 182 | Your thought process should follow the next steps {'audibly stating the CONCLUSION for each step number without quoting it:' if is_debug_prompt else 'silently:'} 183 | 1. Understand the human's request and formulate it as a self-contained question. 184 | 2. Decide which tool should be invoked can provide the most information, and with what input. Decide all prerequisites for the tool and show how each is met. 185 | 3. Formulate the tool invocation request, or answer, in JSON format as detailed above. IMPORTANT: THIS PART MUST BE DELIVERED IN A SINGLE LINE. DO NOT USE MULTILINE SYNTAX. 186 | 187 | IMPORTANT: Make sure to focus on the most recent request from the user, even if it is a repeated one.""" } 188 | 189 | prep_message_final = { 190 | "role" : "user", 191 | "content" : f"""You are Robot 1-X (R1X), a helpful, cheerful assistant developed by the Planet Express team and integrated into a {messenger} chat. 192 | You are based on {gpt_ver} technology. More information about you is available at https://r1x.ai. 193 | 194 | I will provide a CHAT between R1X and a human, wrapped with tags: CHAT. Last speaker is the user. 195 | I will also provide you with data generated by external tool invocations, which you can rely on for your answers; this data will be wrapped with tags, as such: DATA. 196 | 197 | DO NOT CONTRADICT OR DOUBT THAT DATA. IT SUPERSEDES ANY OTHER DATA YOU HAVE, AND IS UP TO DATE AS OF TODAY. 198 | DO NOT MENTION TO THE USER THIS DATA WAS PROVIDED TO YOU IN ANY WAY. 199 | NEVER MENTION TO THE USER THE REPLY IS ACCORDING TO A SEARCH. 200 | DO NOT START YOUR ANSWER WITH A MAGNIFYING GLASS EMOJI; THAT WILL BE PROVIDED TO THE USER SEPARATELY, AS NEEDED. 201 | 202 | Your task is to provide R1X's answer. 203 | 204 | Today's date is {current_date}. 205 | You are trained with knowledge until September 2021. 206 | If you have missing data, try to estimate, and let the user know your answer is an estimate. 207 | 208 | Your thought process should follow the next steps {'audibly stating the CONCLUSION for each step number without quoting it:' if is_debug_prompt else 'silently:'} 209 | 1. Understand the human's request and formulate it as a self-contained question. 210 | 2. Integrate all data provided to you with your current knowledge and formulate a response. 211 | 212 | IMPORTANT: Make sure to focus on the most recent request from the user, even if it is a repeated one.""" } 213 | 214 | return prep_message_final if is_final else prep_message_stable 215 | 216 | prep_reply_message = {"role": "assistant", "content": "Understood. Please provide me with the chat between R1X and the human."} 217 | 218 | import datetime 219 | 220 | def get_chat_completion_with_tools(ctx:Context, messenger_name, messages, direct): 221 | try: 222 | ctx.log("Starting getChatCompletionWithTools.") 223 | 224 | parsed_messages = deep_clone(messages) if direct else db_messages2messages(messages) 225 | ctx.log({"messages": parsed_messages}) 226 | 227 | prev_responses = [] 228 | 229 | #system_message = get_system_message(ctx, messenger_name) 230 | system_message = None 231 | history = get_limited_message_history(ctx, parsed_messages, system_message) 232 | 233 | prompt_tokens_total = 0 234 | completion_tokens_total = 0 235 | 236 | max_iterations = 2 237 | successful_iterations = 0 238 | 239 | ctx.set_stat('tools-flow:tool-invocations', successful_iterations) 240 | 241 | for i in range(max_iterations): 242 | ctx.log(f"Invoking completionIterativeStep #{i}") 243 | 244 | ctx.set_stat('tools-flow:iterations', i + 1) 245 | 246 | is_final = (i == (max_iterations - 1)) 247 | 248 | result = completion_iterative_step(ctx, messenger_name, deep_clone(history), prev_responses, is_final) 249 | answer = result['answer'] 250 | tool = result['tool'] 251 | input_ = result['input'] 252 | prompt_tokens = result['prompt_tokens'] 253 | completion_tokens = result['completion_tokens'] 254 | 255 | ctx.log(f"completionIterativeStep done, answer={answer} tool={tool} input={input_} prompt_tokens={prompt_tokens} completion_tokens={completion_tokens}" ) 256 | 257 | if not answer and not tool: 258 | break 259 | 260 | prompt_tokens_total += prompt_tokens 261 | completion_tokens_total += completion_tokens 262 | 263 | if answer: 264 | ctx.log(f"Answer returned: {answer}") 265 | 266 | if successful_iterations > 0: 267 | answer = "\N{LEFT-POINTING MAGNIFYING GLASS}: " + answer 268 | 269 | ctx.set_stat('tools-flow:success', True) 270 | 271 | return Box({ 272 | "response": answer, 273 | "promptTokens": prompt_tokens_total, 274 | "completionTokens": completion_tokens_total 275 | }) 276 | 277 | if tool and input_: 278 | successful_iterations += 1 279 | ctx.set_stat('tools-flow:tool-invocations', successful_iterations) 280 | 281 | ctx.log(f"Invoking TOOL {tool} with INPUT {input_}") 282 | response, brk = invoke_tool(ctx, tool, input_, message=messages[-1]) 283 | if brk: 284 | return Box({ 285 | "response": response, 286 | "promptTokens": prompt_tokens_total, 287 | "completionTokens": completion_tokens_total 288 | }) 289 | prev_responses.append(f"INVOKED TOOL={tool}, TOOL_INPUT={input_}, ACCURACY=100%, INVOCATION DATE={datetime.datetime.now().date()} RESPONSE={response}") 290 | 291 | except Exception as e: 292 | ctx.log({"e": e}) 293 | traceback.print_exc(); 294 | 295 | ctx.log("getChatCompletionWithTools: failed generating customized reply, falling back to getChatCompletion.") 296 | 297 | ctx.set_stat('tools-flows:success', False) 298 | 299 | return get_chat_completion(ctx, messenger_name, messages, direct) 300 | 301 | def completion_iterative_step(ctx, messenger_name, history, prev_responses, is_final : bool): 302 | result = {'answer': None, 'tool': None, 'input': None, 'prompt_tokens': None, 'completion_tokens': None} 303 | 304 | messages = [] 305 | 306 | new_request = {'role': 'user', 'content': ''} 307 | new_request['content'] += 'Here is the chat so far:\n' 308 | 309 | for message in history: 310 | speaker = 'R1X' if message['role'] == 'assistant' else 'Human' 311 | new_request['content'] += f'\n<{speaker}>: {message["content"]}' 312 | 313 | new_request['content'] += '\n' 314 | 315 | if prev_responses: 316 | prev_responses_flat = '\n'.join(prev_responses) 317 | new_request['content'] += f'\nhere is the data so far:\n\n{prev_responses_flat}\n' 318 | 319 | prep_message = get_prep_message(ctx, messenger_name, is_final) 320 | messages.append(prep_message) 321 | messages.append(prep_reply_message) 322 | 323 | messages.append(new_request) 324 | 325 | reply = get_chat_completion_core(ctx, messenger_name, messages) 326 | result['prompt_tokens'] = reply.promptTokens 327 | result['completion_tokens'] = reply.completionTokens 328 | 329 | if is_final: 330 | result['answer'] = reply['response'] 331 | return result 332 | 333 | regex = re.compile(r'(.*?)<\/yair1xigoresponse>', re.DOTALL) 334 | matches = regex.search(reply['response']) 335 | 336 | if not matches: 337 | return result 338 | 339 | json_reply = eval(matches.group(1)) 340 | ctx.log(f'completionIterativeStep: matched response: {json_reply}') 341 | 342 | result['answer'] = json_reply.get('ANSWER') 343 | if result['answer']: 344 | return result 345 | 346 | if json_reply.get('TOOL') and json_reply.get('TOOL_INPUT'): 347 | result['tool'] = json_reply.get('TOOL') 348 | result['input'] = json_reply.get('TOOL_INPUT') 349 | return result 350 | 351 | return result 352 | 353 | def chat_completion_create_wrap(ctx: Context, model, messages): 354 | if model == 'gpt-4': 355 | response = openai.ChatCompletion().create(model=model, messages=messages, temperature=0.2) 356 | 357 | return response 358 | 359 | if model == 'gpt-3.5-turbo': 360 | # TODO: cleanup per issue #55 361 | if os.environ['AZURE_OPENAI_KEY'] == '': 362 | return openai.ChatCompletion().create(model=model, messages=messages, temperature=0.2) 363 | 364 | url = "https://r1x.openai.azure.com/openai/deployments/gpt-35-turbo/chat/completions?api-version=2023-05-15" 365 | 366 | headers = { 367 | "Content-Type": "application/json", 368 | "api-key": os.environ['AZURE_OPENAI_KEY'] 369 | } 370 | 371 | data = { 372 | "messages" : messages, 373 | "temperature": 0.2 374 | } 375 | 376 | response = requests.post(url, headers=headers, data=json.dumps(data)).json() 377 | 378 | ctx.log('Azure GPT 3.5 response:', response) 379 | 380 | content_filter_active = response.get('error', {}).get('code') == 'content_filter' or \ 381 | response.get('choices', [{}])[0].get('finish_reason') == 'content_filter' 382 | 383 | if content_filter_active: 384 | ctx.log('Content filtering applied; falling back to OpenAI API.') 385 | ctx.set_stat('completion:content-filter', True) 386 | response = openai.ChatCompletion().create(model=model, messages=messages, temperature=0.2) 387 | 388 | return response 389 | 390 | ctx.log(f'chat_completion_create_wrap: unsupported completion model {model}.') 391 | 392 | assert False 393 | 394 | def invoke_tool(ctx:Context, tool, input, message): 395 | tool_canon = tool.strip().upper() 396 | 397 | if tool_canon.startswith('SEARCH'): 398 | # Replace this with an appropriate call to the Serper module 399 | ctx.log(f'Invoking Google search using SERPER, input={input}') 400 | serper = google_serper.GoogleSerperAPIWrapper(serper_api_key=os.environ['SERPER_API_KEY']) 401 | answer = serper.run(input) 402 | ctx.log(f'SERPER search result: {answer}') 403 | 404 | return answer, False 405 | 406 | if tool_canon.startswith('WEATHER'): 407 | answer = invoke_weather_search(ctx, input) 408 | 409 | return answer, False 410 | 411 | if tool_canon.startswith('ALERT'): 412 | ctx.set_stat('tools-flow:tool-alert', 1) 413 | invoke_alert_tool(ctx, input, message) 414 | return "alert added successfully.", True 415 | 416 | 417 | return None, False 418 | 419 | def parse_geolocation(location_data): 420 | regex = re.compile(r'^(\d+\.\d+)\° ([NSEW]),\s*(\d+\.\d+)\° ([NSEW])$') 421 | match = regex.match(location_data) 422 | 423 | if not match: 424 | return None 425 | 426 | lat = float(match.group(1)) * (-1 if match.group(2) == 'S' else 1) 427 | lon = float(match.group(3)) * (-1 if match.group(4) == 'W' else 1) 428 | 429 | return Box({'lat': lat, 'lon': lon}) 430 | 431 | def invoke_weather_search(ctx:Context, input): 432 | ctx.log(f'invokeWeatherSearch, input={input}') 433 | 434 | # Replace this with an appropriate call to the Serper module 435 | # serper = Serper() 436 | geo_prompt = f'{input} long lat' 437 | ctx.log(f'Invoking geolocation search using SERPER, input={geo_prompt}') 438 | 439 | serper = google_serper.GoogleSerperAPIWrapper(serper_api_key=os.environ['SERPER_API_KEY']) 440 | geo_res = serper.run(geo_prompt) 441 | ctx.log(f'SERPER geolocation result: {geo_res}') 442 | 443 | geo = parse_geolocation(geo_res) 444 | if not geo: 445 | return None 446 | 447 | ctx.log(f'Geolocation: lat={geo.lat} lon={geo.lon}') 448 | 449 | w_res = requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={geo.lat}&longitude={geo.lon}&daily=temperature_2m_max,temperature_2m_min,precipitation_hours,precipitation_probability_max,windspeed_10m_max&forecast_days=3&timezone=auto') 450 | w_res_json = w_res.json() 451 | 452 | return json.dumps(w_res_json['daily']) 453 | 454 | def create_transcription(ctx:Context, mp3_file_path): 455 | language = ctx.user_settings.get('transcription.lang', None) 456 | ctx.log(f'createTranscription: preferred user language is {language}') 457 | 458 | t0 = time.time() 459 | 460 | transcript = openai.Audio.transcribe( 461 | file = open(mp3_file_path, "rb"), 462 | model = OPENAI_SPEECH_TO_TEXT_MODEL, 463 | language = language 464 | ) 465 | 466 | transcription = transcript['text'] 467 | time_taken = int((time.time() - t0) * 1000) 468 | 469 | ctx.log(f'createTranscription: timeTaken={time_taken}ms transcription={transcription}') 470 | 471 | return transcription 472 | -------------------------------------------------------------------------------- /src/services/timers.py: -------------------------------------------------------------------------------- 1 | 2 | import datetime 3 | import time 4 | import traceback 5 | from typing import Tuple 6 | from infra import logger, utils 7 | from infra.context import Context 8 | utils.load_env() 9 | import db_models 10 | from services.messengers import messenger_factory 11 | 12 | def invoke_alert_tool(ctx:Context, alert_args:Tuple[int, str], parsed_message): 13 | messenger_chat_id = f"{parsed_message.source}:{parsed_message.chatId}" 14 | timestamp = int(parsed_message.messageTimestamp.timestamp()) 15 | ref_id = parsed_message.messageId 16 | 17 | with db_models.Session() as session: 18 | now = datetime.datetime.now() 19 | delta_ts, topic = alert_args 20 | timer_extra_data = {"topic":topic, "ref_id":ref_id} 21 | trigger_ts = datetime.datetime.fromtimestamp(timestamp+ int(delta_ts)) 22 | timer = db_models.Timer( 23 | chat_id=messenger_chat_id, 24 | trigger_timestamp=trigger_ts, 25 | data=timer_extra_data, 26 | created_at=now, 27 | updated_at=now 28 | ) 29 | 30 | session.add(timer) 31 | session.commit() 32 | session.refresh(timer) 33 | 34 | session.close() 35 | 36 | return timer 37 | 38 | def alert_users(): 39 | ctx = Context() 40 | while True: 41 | try: 42 | now = datetime.datetime.utcnow() 43 | with db_models.Session() as session: 44 | alerts = session.query(db_models.Timer).filter(db_models.Timer.trigger_timestamp <= now).all() 45 | if alerts: 46 | ctx.log(f"[TIMER] found {len(alerts)} alerts") 47 | 48 | for alert in alerts: 49 | try: 50 | topic = alert.data.get("topic", None) 51 | quote_id = alert.data.get("ref_id", None) 52 | messenger = messenger_factory.make_messenger(alert.chat_id) 53 | ctx.log(f"[TIMER] sending a timer message to chat id {alert.chat_id}") 54 | messenger.send_message(ctx, { 55 | "kind": "text", 56 | "body": f"You asked me to remind you about {topic}" if topic else "You asked me to remind you", 57 | "quote_id":quote_id 58 | }) 59 | except: 60 | ctx.log(f"[TIMER] failed to send alert {alert.id} to chat id:{alert.chat_id} ") 61 | delete_alerts(ctx, now) 62 | time.sleep(5) 63 | 64 | except Exception as e: 65 | logger.logger.error(f'Exception occurred; {e}; stack trace: ', traceback.format_exc()) 66 | 67 | def delete_alerts(ctx:Context, now:datetime.datetime) -> None: 68 | with db_models.Session() as session: 69 | session.query(db_models.Timer).filter(db_models.Timer.trigger_timestamp <= now).delete() 70 | ctx.log("[TIMER] alerts deleted") 71 | session.commit() -------------------------------------------------------------------------------- /src/services/token_prediction/token_predictor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tiktoken 4 | 5 | # global variable to hold the encode objects between invocations 6 | encoder = tiktoken.get_encoding("cl100k_base") 7 | 8 | def _num_tokens_from_messages(messages): 9 | num_tokens = 0 10 | for message in messages: 11 | num_tokens += 4 12 | for key, value in message.items(): 13 | num_tokens += len(encoder.encode(value)) 14 | if key == "name": 15 | num_tokens -= 1 16 | 17 | num_tokens += 2 18 | num_tokens += 1 19 | return num_tokens 20 | 21 | def _get_message_tokens(message): 22 | if len(message) == 0: 23 | raise ValueError(f"message is malformed. It's {message} but doesn't have any keys") 24 | 25 | num_tokens = 0 26 | num_tokens += 4 27 | for key, value in message.items(): 28 | num_tokens += len(encoder.encode(value)) 29 | if key == "name": 30 | num_tokens -= 1 31 | 32 | return num_tokens 33 | 34 | def _get_message_index_upto_max_tokens(system_message, chat_messages, soft_token_limit, hard_token_limit): 35 | num_tokens = 0 36 | num_tokens += 2 37 | num_tokens += 1 38 | 39 | include_system_message = False 40 | start_index = len(chat_messages) 41 | 42 | if system_message != None: 43 | num_tokens += _get_message_tokens(system_message) 44 | 45 | if num_tokens > hard_token_limit: 46 | return [include_system_message, start_index] 47 | 48 | include_system_message = (system_message != None) 49 | 50 | num_messages = 0 51 | 52 | for start_index in range(len(chat_messages), 0, -1): 53 | message = chat_messages[start_index - 1] 54 | 55 | num_tokens += _get_message_tokens(message) 56 | 57 | if num_tokens <= soft_token_limit: 58 | num_messages += 1 59 | continue 60 | 61 | if start_index == len(chat_messages) and num_tokens <= hard_token_limit: 62 | num_messages += 1 63 | continue 64 | 65 | break 66 | 67 | return [include_system_message, len(chat_messages) - num_messages] 68 | 69 | def get_messages_upto_max_tokens(ctx, system_message, chat_messages, soft_token_limit, hard_token_limit): 70 | ctx.log(f"getMessagesUptoMaxTokens: chatMessages.length={len(chat_messages)}, softTokenLimit={soft_token_limit}, hardTokenLimit={hard_token_limit}") 71 | 72 | include_system_message, start_index = _get_message_index_upto_max_tokens(system_message, chat_messages, soft_token_limit, hard_token_limit) 73 | 74 | result = [system_message] if include_system_message else [] 75 | 76 | if start_index == len(chat_messages): 77 | return result 78 | 79 | result += chat_messages[start_index:] 80 | 81 | return result 82 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/test/__init__.py -------------------------------------------------------------------------------- /test/data/backref_user1.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "Is Pele alive?" }, 3 | { "role" : "assistant", "content" : "According to the latest information available, Pele passed away on December 29, 2022, at the Albert Einstein Israelite Hospital in São Paulo, State of São Paulo, Brazil." }, 4 | { "role" : "user", "content" : "what is its radius?" }, 5 | { "role" : "assistant", "content" : "I'm sorry, I'm not sure what you are referring to. Could you please provide more context or clarify your question?" }, 6 | { "role" : "user", "content" : "tennis ball" } 7 | ] } 8 | 9 | -------------------------------------------------------------------------------- /test/data/brazil.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { 3 | "role": "assistant", 4 | "content": "ANSWER: I apologize, but I cannot provide any information about Guy Caspary, as I could not find any relevant or notable persons with that name using my search capabilities. Can you please provide any additional context or details that may help me better understand who you are asking about?" 5 | }, 6 | { "role": "user", "content": "Who is Guy Caspary?" }, 7 | { 8 | "role": "assistant", 9 | "content": "Guy Caspary is an individual who has a professional profile on LinkedIn. Beyond this, I could not find any additional information about him or his background using my tools." 10 | }, 11 | { 12 | "role": "user", 13 | "content": "What's the weather in Milan like tomorrow?" 14 | }, 15 | { 16 | "role": "assistant", 17 | "content": "The weather in Milan, Italy tomorrow, according to the most recent forecast, is expected to be partly cloudy with a high of 21°C (70°F) and a low of 9°C (48°F)." 18 | }, 19 | { "role": "user", "content": "Who is Aviram Yeruchami?" }, 20 | { 21 | "role": "assistant", 22 | "content": "Aviram Yeruchami is a Director of Engineering at Cisco, specifically for the company's Leaba division. Not much else is available in the public domain about his personal or professional background." 23 | }, 24 | { "role": "user", "content": "What should I wear tomorrow?" }, 25 | { 26 | "role": "assistant", 27 | "content": "Tool=search, tool_input=What is the weather forecast for tomorrow in my location?" 28 | }, 29 | { "role": "user", "content": "What should I wear tomorrow?" }, 30 | { 31 | "role": "assistant", 32 | "content": "Based on the weather forecast for your location, I would recommend checking the expected high and low temperatures for tomorrow, as well as the expected weather conditions (i.e. sunny, rainy, cloudy) to determine what to wear. If it is expected to be warm and sunny, you may want to wear light and comfortable clothing, while if it is expected to be cooler and potentially rainy, you may want to wear a jacket or other appropriate clothing to keep you warm and dry." 33 | }, 34 | { "role": "user", "content": "I am located in Milan." }, 35 | { 36 | "role": "assistant", 37 | "content": "Understood. Is there a specific question or topic you would like me to help with related to Milan?" 38 | }, 39 | { "role": "user", "content": "What should I wear tomorrow?" }, 40 | { 41 | "role": "assistant", 42 | "content": "ANSWER: Based on the weather forecast for Milan, Italy, tomorrow, which is partly cloudy with highs of 21°C (70°F) and lows of 9°C (48°F), it is recommended that you wear comfortable clothing such as light shirts, pants or skirts, and a jacket or sweater to keep you warm in case the temperature drops. It's advisable to wear clothes in layers, to be prepared for fluctuations in temperatures." 43 | }, 44 | 45 | { 46 | "role": "user", 47 | "content": "Who is the current president of Brasil?" 48 | } 49 | ] } 50 | -------------------------------------------------------------------------------- /test/data/comprehension/backref_ai1.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | {"role" : "user", "content" : "Tell me i'm nice and ask me if i like to hear it again"}, 3 | {"role" : "assistant", "content" : "You are nice. Would you like to hear it again?"}, 4 | {"role" : "user", "content" : "yes"} 5 | ] } 6 | 7 | -------------------------------------------------------------------------------- /test/data/comprehension/backref_ai2.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | {"role" : "user", "content" : "Tell me i'm nice and ask me if i like to hear it again"}, 3 | {"role" : "assistant", "content" : "You are nice. Would you like to hear it again?"}, 4 | {"role" : "user", "content" : "no"} 5 | ] } 6 | 7 | -------------------------------------------------------------------------------- /test/data/fiction/story.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "write me a story about Yair the Great, and Igor the Furious, two sages." } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/data/fiction/story2.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "write a story about Abigail, Yael and Renena, three princesses riding dragons." } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/data/finance/btc.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "what was the value of btc yesterday?" } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/data/finance/twostocks.json.fail: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "what was the value of intc and csco yesterday?" } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/data/misc/42.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "42" } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/data/pele1.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "Is Pele alive?" } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/data/ukpm.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "Who is the UK PM?" } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/data/weather1.json: -------------------------------------------------------------------------------- 1 | { "messages" : [ 2 | { "role" : "user", "content" : "Is it going to be cold tomorrow?" } 3 | ] } 4 | 5 | -------------------------------------------------------------------------------- /test/local-test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import os 4 | import sys 5 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src'))) 6 | 7 | from infra.utils import load_env 8 | load_env() 9 | 10 | import sys 11 | import json 12 | import asyncio 13 | from pathlib import Path 14 | from infra.logger import logger, create_logging_context 15 | from services.open_ai.query_openai import get_chat_completion_with_tools 16 | 17 | def run(): 18 | args = sys.argv[1:] 19 | 20 | # Check if the user specified any command line arguments 21 | if not args: 22 | print("No arguments provided.") 23 | sys.exit(1) 24 | 25 | json_input = args[0] 26 | 27 | with open(json_input, 'r', encoding='utf-8') as file: 28 | data = file.read() 29 | history = json.loads(data)["messages"] 30 | 31 | ctx = create_logging_context(0) 32 | ctx.user_channel = 'stable' 33 | reply = get_chat_completion_with_tools(ctx, 'WhatsApp', history, True) 34 | 35 | print({'reply': reply}) 36 | 37 | run() 38 | -------------------------------------------------------------------------------- /test/test-token-predictor.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | const assert = require('assert'); 3 | require("dotenv").config(); 4 | const tokenPredictor = require(".././src/services/token-prediction/token-predictor"); 5 | 6 | async function main() { 7 | const systemMessage = {"role": "system", "content": "you are a helpful bot"}; 8 | 9 | const chatMessages = [ 10 | {"role": "user", "content": "i'm your lord"}, 11 | {"role": "assistant", "content": "you'd wish"}, 12 | {"role": "user", "content": "מה אתה אומר"}, 13 | {"role": "assistant", "content": "מה שאתה שומע"} 14 | ]; 15 | 16 | // build the expected behavior. a map from number of tokens --> message list using the numTokensFromMessages predictor 17 | const numTokensToMessages = new Map(); 18 | const previousMessages = []; 19 | let previousNumTokens = 0; 20 | const chatMessagesReversed = chatMessages.slice().reverse(); // make a copy of chatMessages and reverse it 21 | 22 | for (let numChatMessages = 0; numChatMessages <= chatMessagesReversed.length ; ++numChatMessages) { 23 | // take the first numChatMessages from the end 24 | const subsetChatMessagesReversed = chatMessagesReversed.slice(0, numChatMessages); 25 | 26 | // get back the normal order 27 | const subsetChatMessages = subsetChatMessagesReversed.slice().reverse(); 28 | 29 | // build the full list message 30 | const combinedMessages = [systemMessage, ...subsetChatMessages]; 31 | 32 | // calculate their number of tokens 33 | const numTokens = await tokenPredictor.numTokensFromMessages(combinedMessages); 34 | 35 | // fill out the entries of token sizes for the unset sizes so far in the message map 36 | for (const token = previousNumTokens; token < numTokens; token++) { 37 | numTokensToMessages[token] = previousMessages; 38 | } 39 | 40 | // if this is the last iteration then nothing will fill in for it (no next), so do now 41 | if (numChatMessages == chatMessagesReversed.length) { 42 | 43 | numTokensToMessages[numTokens] = combinedMessages; 44 | } 45 | 46 | // update for next iteration 47 | previousMessages = combinedMessages; 48 | previousNumTokens = numTokens; 49 | } 50 | // console.log(numTokensToMessages); 51 | 52 | 53 | // verify that the message selector picks up exactly the expected messages. Go +10 beyond the last previousNumTokens just for checks. 54 | for (const maxTokens = 0; maxTokens < previousNumTokens+10; maxTokens ++) { 55 | const actualMessages = await tokenPredictor.getMessagesUptoMaxTokens(systemMessage, chatMessages, maxTokens); 56 | 57 | //console.log(`maxTokens=${maxTokens}\n actual=${JSON.stringify(actualMessages)}\nexpected=${JSON.stringify(numTokensToMessages[maxTokens])}`); 58 | try { 59 | // if the number of tokens still exist int the map, compare to the map 60 | if (maxTokens <= previousNumTokens){ 61 | assert.deepStrictEqual(actualMessages, numTokensToMessages[maxTokens]); 62 | } else { 63 | //compare the the full list 64 | assert.deepStrictEqual(actualMessages, previousMessages); 65 | } 66 | } catch (error) { 67 | console.log(`Actual and expected message list are different for ${maxTokens}\n`, error); 68 | throw error; 69 | } 70 | } 71 | 72 | // Get the index of the message within the chatMessages list from which messages should be taken. 73 | // const res = await tokenPredictor.getMessageIndexUptoMaxTokens(systemMessage, chatMessages, 17); console.log(res); 74 | 75 | // Get the number of tokens a list of messages should consume 76 | // tokens = await tokenPredictor.numTokensFromMessages(combined); console.log("numTokensFromMessages", tokens); 77 | } 78 | 79 | main(); 80 | 81 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/tools/__init__.py -------------------------------------------------------------------------------- /tools/delete_chat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import os 4 | import sys 5 | import json 6 | import psycopg2 7 | from dotenv import load_dotenv 8 | from datetime import datetime 9 | 10 | def connect_to_db(): 11 | stage = os.environ['R1X_STAGE'] if 'R1X_STAGE' in os.environ else 'dev' 12 | print('Connecting to %s environment...' % stage) 13 | load_dotenv('.env.%s' % stage) 14 | connection_string = os.getenv('DB_CONNECTION_STRING') 15 | conn = psycopg2.connect(connection_string) 16 | return conn 17 | 18 | def delete_history(source, chat_id): 19 | conn = connect_to_db() 20 | cursor = conn.cursor() 21 | 22 | cursor.execute("DELETE FROM \"Messages\" WHERE source = %s AND \"chatId\" = %s", (source, chat_id,)) 23 | 24 | conn.commit() 25 | cursor.close() 26 | conn.close() 27 | 28 | if __name__ == "__main__": 29 | if len(sys.argv) < 3: 30 | print("Usage: python script.py source chat_id") 31 | sys.exit(1) 32 | 33 | source = sys.argv[1] 34 | chat_id = sys.argv[2] 35 | 36 | delete_history(source, chat_id) 37 | 38 | -------------------------------------------------------------------------------- /tools/extract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import re 4 | import json 5 | import argparse 6 | 7 | def extract_messages(log_file, output_file): 8 | with open(log_file, 'r') as log, open(output_file, 'w') as out: 9 | log_content = log.read() 10 | pattern = r"Starting getChatCompletionWithTools\.([\s\S]*?)(parsedMessages: \[[\s\S]*?\])" 11 | matches = re.findall(pattern, log_content) 12 | 13 | if matches: 14 | last_instance = matches[-1][-1] 15 | 16 | role_pattern = r"role: (?:'([^']*)'|\"([^\"]*)\")" 17 | #content_pattern = r"content: (?:'([^']*)'|\"([^\"]*)\")" 18 | content_pattern = r"content:\s*(?:'([^']+)'|\"([^\"]+)\")" 19 | 20 | roles = [role[0] or role[1] for role in re.findall(role_pattern, last_instance)] 21 | contents = [content[0] + content[1] for content in re.findall(content_pattern, last_instance, re.MULTILINE | re.DOTALL )] 22 | 23 | messages = { "messages" : [{"role": role, "content": content} for role, content in zip(roles, contents)] } 24 | 25 | with open(output_file, 'w') as out: 26 | json.dump(messages, out, indent=2) 27 | else: 28 | print("No matching instances found in the log file.") 29 | 30 | # Replace 'input.log' and 'output.json' with your actual log and output file names 31 | if __name__ == '__main__': 32 | parser = argparse.ArgumentParser(description='Extract messages from a log file and save them to an output JSON file.') 33 | parser.add_argument('--input', required=True, help='Path to the input log file.') 34 | parser.add_argument('--output', required=True, help='Path to the output JSON file.') 35 | 36 | args = parser.parse_args() 37 | 38 | extract_messages(args.input, args.output) 39 | 40 | 41 | -------------------------------------------------------------------------------- /tools/multi_sender.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | import sys 5 | import os 6 | from typing import Dict, List 7 | 8 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src'))) 9 | from infra import utils 10 | from infra.context import Context 11 | from services.messengers.messenger_factory import make_messenger 12 | 13 | utils.load_env() 14 | 15 | 16 | def multi_send(ctx:Context, full_chat_ids: List[str], attrs: Dict[str,str]): 17 | for full_chat_id in full_chat_ids: 18 | messenger = make_messenger(full_chat_id) 19 | response = messenger.send_message(ctx, attrs) 20 | print(response) 21 | 22 | should_send_contact = attrs['contact_name'] and attrs['contact_handle'] 23 | if should_send_contact: 24 | response = messenger.send_contact(ctx, attrs['contact_name'], attrs['contact_handle']) 25 | print(response) 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser(description='Send a message to multiple chat ids.') 29 | 30 | parser.add_argument('--message', required=False, help='Message string.') 31 | parser.add_argument('--file', required=False, help='Message string, in file.') 32 | parser.add_argument('--chat_ids', required=True, help='a comma seperated list of : e.g wa:12346578,tg:456789654 ') 33 | parser.add_argument('--contact-name', required=False, action='store', help='''Send contact. Name is the contact's name.''') 34 | parser.add_argument('--contact-handle', required=False, action='store', help='''Send contact. Handle is contact's handle in WhatsApp/Telegram.''') 35 | 36 | args = parser.parse_args() 37 | 38 | if not args.message and not args.file: 39 | print('No message provided. Use --message or --file') 40 | 41 | if args.message: 42 | msg = args.message 43 | else: 44 | msg = open(args.file, 'r').read() 45 | 46 | full_chat_ids=args.chat_ids.split(',') 47 | 48 | ctx = Context() 49 | multi_send(ctx, full_chat_ids, { 50 | "kind": "text", 51 | "body": msg, 52 | "contact_name" : args.contact_name, 53 | "contact_handle" : args.contact_handle 54 | }) 55 | 56 | 57 | -------------------------------------------------------------------------------- /tools/stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | import dotenv 5 | import numpy 6 | import os 7 | import psycopg2 8 | import psycopg2.extras 9 | 10 | dotenv.load_dotenv('.env.prod') 11 | ps = psycopg2.connect(os.environ['DB_CONNECTION_STRING']) 12 | 13 | cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor) 14 | 15 | def get_message_count(start_date, end_date): 16 | cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor) 17 | 18 | cur.execute('''SELECT COUNT(id) FROM "Messages" WHERE DATE("createdAt") >= '%s' AND DATE("createdAt") <= '%s';''' % (start_date, end_date)) 19 | message_count = cur.fetchall()[0][0] 20 | 21 | return message_count 22 | 23 | def get_active_chats_count(start_date, end_date): 24 | cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor) 25 | 26 | cur.execute('''SELECT COUNT(DISTINCT (source, "chatId")) FROM "Messages" WHERE DATE("createdAt") >= '%s' AND DATE("createdAt") <= '%s';''' % (start_date, end_date)) 27 | active_chat_count = cur.fetchall()[0][0] 28 | 29 | return active_chat_count 30 | 31 | def get_active_chat_histogram(start_date, end_date): 32 | cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor) 33 | 34 | # Select :, so this data can be used later to send messages to specific users. 35 | cur.execute('''SELECT source, "chatId", chat_id_count FROM (SELECT source, "chatId", COUNT(*) as chat_id_count FROM "Messages" WHERE DATE("createdAt") >= '%s' AND DATE("createdAt") <= '%s' GROUP BY source, "chatId") AS chat_count_table ORDER BY chat_id_count DESC;''' % (start_date, end_date)) 36 | 37 | chats = [] 38 | for member in cur.fetchall(): 39 | chats.append(member) 40 | 41 | return chats 42 | 43 | parser = argparse.ArgumentParser(description='Fetch statistics from R1X database.') 44 | parser.add_argument('--start-date', type=str, dest='start_date', help='Start date.', required=True) 45 | parser.add_argument('--end-date', type=str, dest='end_date', help='End date.', required=True) 46 | args = parser.parse_args() 47 | 48 | num_msgs = get_message_count(args.start_date, args.end_date) 49 | print('Number of messages: ', num_msgs) 50 | 51 | chats = get_active_chat_histogram(args.start_date, args.end_date) 52 | print('Active chats today: ', len(chats)) 53 | 54 | msg_arr = [] 55 | 56 | numbers = [] 57 | 58 | for chat in chats: 59 | (source, chat_id, msgs) = chat 60 | msg_arr.append(msgs) 61 | 62 | if source == 'wa': 63 | numbers.append(f'{source}:{chat_id}') 64 | 65 | if msgs < 8: 66 | continue 67 | 68 | print(source, chat_id, msgs) 69 | 70 | print(','.join(numbers)) 71 | 72 | print(numpy.histogram(msg_arr, [0, 5, 10, 15, 20, 50, 100])) 73 | 74 | -------------------------------------------------------------------------------- /tools/user_settings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import os 4 | import sys 5 | import json 6 | import psycopg2 7 | from dotenv import load_dotenv 8 | from datetime import datetime 9 | 10 | def connect_to_db(): 11 | stage = os.environ['R1X_STAGE'] if 'R1X_STAGE' in os.environ else 'dev' 12 | print('Connecting to %s environment...' % stage) 13 | load_dotenv('.env.%s' % stage) 14 | connection_string = os.getenv('DB_CONNECTION_STRING') 15 | conn = psycopg2.connect(connection_string) 16 | return conn 17 | 18 | def get_settings(user_id): 19 | conn = connect_to_db() 20 | cursor = conn.cursor() 21 | 22 | cursor.execute("SELECT * FROM user_settings WHERE user_id = %s ORDER BY id DESC", (user_id,)) 23 | row = cursor.fetchone() 24 | 25 | if row: 26 | print("Settings for user_id {}: {}".format(user_id, row)) 27 | else: 28 | print("No settings found for user_id {}".format(user_id)) 29 | 30 | cursor.close() 31 | conn.close() 32 | 33 | def set_setting(user_id, key_value_pairs): 34 | conn = connect_to_db() 35 | cursor = conn.cursor() 36 | 37 | cursor.execute("SELECT settings FROM user_settings WHERE user_id = %s", (user_id,)) 38 | row = cursor.fetchone() 39 | 40 | settings = row[0] if row else {} 41 | 42 | for pair in key_value_pairs: 43 | key, value = pair.split("=") 44 | settings[key] = value 45 | 46 | cursor.execute('INSERT INTO user_settings (user_id, settings, version, "createdAt", "updatedAt") VALUES (%s, %s, 1, %s, %s)', 47 | (user_id, json.dumps(settings), datetime.now(), datetime.now())) 48 | 49 | conn.commit() 50 | cursor.close() 51 | conn.close() 52 | 53 | def clear_setting(user_id, key): 54 | conn = connect_to_db() 55 | cursor = conn.cursor() 56 | 57 | cursor.execute("SELECT settings FROM user_settings WHERE user_id = %s", (user_id,)) 58 | row = cursor.fetchone() 59 | 60 | if row: 61 | settings = row[0] 62 | if key in settings: 63 | del settings[key] 64 | cursor.execute("UPDATE user_settings SET settings = %s WHERE user_id = %s", (json.dumps(settings), user_id)) 65 | conn.commit() 66 | else: 67 | print("Key not found in settings for user_id {}".format(user_id)) 68 | else: 69 | print("No settings found for user_id {}".format(user_id)) 70 | 71 | cursor.close() 72 | conn.close() 73 | 74 | if __name__ == "__main__": 75 | if len(sys.argv) < 3: 76 | print("Usage: python script.py [get|set|clear] user_id [key=value [key=value]...]") 77 | sys.exit(1) 78 | 79 | action = sys.argv[1] 80 | user_id = sys.argv[2] 81 | 82 | if action == "get": 83 | get_settings(user_id) 84 | elif action == "set": 85 | if len(sys.argv) < 4: 86 | print("Usage: python script.py set user_id key=value [key=value]...") 87 | sys.exit(1) 88 | 89 | key_value_pairs = sys.argv[3:] 90 | set_setting(user_id, key_value_pairs) 91 | elif action == "clear": 92 | if len(sys.argv) < 4: 93 | print("Usage: python script.py clear user_id key") 94 | sys.exit(1) 95 | 96 | key = sys.argv[3] 97 | clear_setting(user_id, key) 98 | else: 99 | print("Invalid action. Use get, set, or clear.") 100 | sys.exit(1) 101 | 102 | --------------------------------------------------------------------------------