├── .env.example
├── .gitignore
├── .nvmrc
├── .vscode
    └── launch.json
├── INSTALL
├── LICENSE
├── README.md
├── SETUP.md
├── alembic.ini
├── alembic
    ├── README
    ├── env.py
    ├── script.py.mako
    └── versions
    │   ├── 05e95b22503f_initial_migration.py
    │   ├── 7a4408168dda_add_events_table.py
    │   └── 8a6746b2ce16_add_timers_table.py
├── requirements.pip
├── src
    ├── __init__.py
    ├── db_models.py
    ├── infra
    │   ├── __init__.py
    │   ├── context.py
    │   ├── logger.py
    │   └── utils.py
    ├── message_handler.py
    ├── run.py
    └── services
    │   ├── message_db.py
    │   ├── messengers
    │       ├── __init__.py
    │       ├── messenger.py
    │       ├── messenger_factory.py
    │       ├── tg.py
    │       └── wa.py
    │   ├── open_ai
    │       ├── __init__.py
    │       └── query_openai.py
    │   ├── timers.py
    │   └── token_prediction
    │       └── token_predictor.py
├── test
    ├── __init__.py
    ├── data
    │   ├── backref_user1.json
    │   ├── brazil.json
    │   ├── comprehension
    │   │   ├── backref_ai1.json
    │   │   └── backref_ai2.json
    │   ├── fiction
    │   │   ├── story.json
    │   │   └── story2.json
    │   ├── finance
    │   │   ├── btc.json
    │   │   └── twostocks.json.fail
    │   ├── misc
    │   │   └── 42.json
    │   ├── pele1.json
    │   ├── ukpm.json
    │   └── weather1.json
    ├── local-test.py
    └── test-token-predictor.js
└── tools
    ├── __init__.py
    ├── delete_chat.py
    ├── extract.py
    ├── multi_sender.py
    ├── stats.py
    └── user_settings.py


/.env.example:
--------------------------------------------------------------------------------
 1 | # Mandatory settings
 2 | 
 3 | ## OpenAI API key
 4 | OPENAI_API_KEY=
 5 | 
 6 | ## Telegram bot token, acquired from the BotFather
 7 | TELEGRAM_BOT_TOKEN=
 8 | 
 9 | ## Telegram bot name, used when @mention-ing it
10 | TELEGRAM_BOT_NAME=
11 | 
12 | ## Serper key, used for searching Google via serper.dev
13 | SERPER_API_KEY=
14 | 
15 | # Optional settings
16 | AZURE_OPENAI_KEY=
17 | 
18 | FACEBOOK_GRAPH_VERSION=
19 | WHATSAPP_BOT_TOKEN=
20 | WHATSAPP_PHONE_NUMBER_ID=
21 | WHATSAPP_PHONE_NUMBER=
22 | 
23 | DB_CONNECTION_STRING=
24 | SQS_QUEUE_URL=
25 | 
26 | DREAMSTUDIO_API_KEY=
27 | 
28 | POSTHOG_API_KEY=
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | .env.dev
3 | .env.prod
4 | handleMessages.zip
5 | logs/
6 | __pycache__/
7 | .vscode/
8 | 


--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | v16.19
2 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Python: Current File",
 9 |             "type": "python",
10 |             "request": "launch",
11 |             "program": "${file}",
12 |             "args": ["test/data/ukpm.json"]
13 |             "console": "integratedTerminal",
14 |             "justMyCode": true,
15 |             "env": {
16 |                 "PYTHONPATH" : "."
17 |             }
18 |         }
19 |     ]
20 | }


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
 1 | Building Python 3.11.3
 2 | ----------------------
 3 | apt-get install wget build-essential libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev libffi-dev zlib1g-dev liblzma-dev -y
 4 | 
 5 | cd /tmp
 6 | 
 7 | wget https://www.python.org/ftp/python/3.11.3/Python-3.11.3.tgz
 8 | tar zxf https://www.python.org/ftp/python/3.11.3/Python-3.11.3.tgz
 9 | 
10 | cd Python-3.11.3
11 | ./configure --enable-optimizations
12 | 
13 | make -j 4
14 | 
15 | make altinstall
16 | 
17 | update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.9 1
18 | 
19 | python3 -m pip install --upgrade pip
20 | 
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Yair Lifshitz, Eli Siegel, Igor Shumsky, Danny Sivan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Robot 1-X: your friendly AI assistant
 2 | Robot 1-X bot is your friendly AI assistant, available via WhatsApp and Telegram.
 3 | 
 4 | WhatsApp: https://wa.me/16692001022
 5 | 
 6 | Telegram: https://t.me/robot1xbot
 7 | 
 8 | 
 9 | Support: https://wa.me/16692221028
10 | 
11 | # What can Robot 1-X do?
12 | ## WhatsApp and Telegram ##
13 | - Answer questions and have a discussion, using GPT-3.5 or GPT-4
14 | - Write stories, plan trips, and tutor
15 | - Tools: search Google
16 | - Voice: answer recorded messages
17 | - Transcription: forward a voice message to Robot 1-X, and he will send you back the transcription
18 | 
19 | ## Telegram only ##
20 | - Attend group chats!
21 |   - Transcribes EVERY voice message, so you can read instead of listening
22 |   - Responds only when @mention-d as beginning of message
23 | 
24 | ## Initial setup
25 | 
26 | Follow the instructions in SETUP.md.
27 | 
28 | ## Local run
29 | 
30 | After executing all instructions in SETUP.md, you can launch Robot 1-X with your private Telegram bot by typing:
31 | 
32 | ```env R1X_STAGE=dev-local ./src/run.py```
33 | 
34 | Messages are stored locally.
35 | 


--------------------------------------------------------------------------------
/SETUP.md:
--------------------------------------------------------------------------------
 1 | # Robot 1-X dependencies
 2 | 
 3 | At a minimum, you need to have the following available to launch Robot 1-X locally:
 4 | 
 5 | - Python packages installed
 6 | - .env.dev-local settings file
 7 | - Telegram bot token
 8 | - OpenAI API key 
 9 | - serper.dev API token
10 | 
11 | ## Python packages
12 | 
13 | Robot 1-X is tested on Python 3.11.
14 | He may or may not operate on older Python versions.
15 | 
16 | To install all dependencies, execute:
17 | 
18 | ```pip3 install -r requirements.pip```
19 | 
20 | ## .env.dev-local settings file
21 | 
22 | Copy .env.example to .env.dev-local.
23 | 
24 | ## Telegram bot
25 | 
26 | Testing Robot 1-X requires, at a minimum, a Telegram bot.
27 | You will need to create a bot, then update your bot's token and name under .env.dev-local.
28 | The process takes about 2 minutes.
29 | 
30 | Creating a Telegram bot: https://core.telegram.org/bots/tutorial#obtain-your-bot-token.
31 | 
32 | After creating the bot, update the *TELEGRAM_BOT_TOKEN* and *TELEGRAM_BOT_NAME* parameters in .env.dev-local.
33 | 
34 | ## OpenAI
35 | 
36 | Create an account with OpenAI: https://platform.openai.com.
37 | 
38 | Then, create an OpenAI API key, here: https://platform.openai.com/account/api-keys
39 | 
40 | Update *OPENAI_API_KEY* in .env.dev-local.
41 | 
42 | ## serper.dev
43 | 
44 | Serper provides you with programmatic access to Google Search, which Robot 1-X relies on.
45 | Go to https://serper.dev, sign up, and get your API token.
46 | 
47 | Update *SERPER_API_KEY* in .env.dev-local.
48 | 
49 | 


--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
  1 | # A generic, single database configuration.
  2 | 
  3 | [alembic]
  4 | # path to migration scripts
  5 | script_location = alembic
  6 | 
  7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
  8 | # Uncomment the line below if you want the files to be prepended with date and time
  9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
 10 | # for all available tokens
 11 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
 12 | 
 13 | # sys.path path, will be prepended to sys.path if present.
 14 | # defaults to the current working directory.
 15 | prepend_sys_path = .
 16 | 
 17 | # timezone to use when rendering the date within the migration file
 18 | # as well as the filename.
 19 | # If specified, requires the python-dateutil library that can be
 20 | # installed by adding `alembic[tz]` to the pip requirements
 21 | # string value is passed to dateutil.tz.gettz()
 22 | # leave blank for localtime
 23 | # timezone =
 24 | 
 25 | # max length of characters to apply to the
 26 | # "slug" field
 27 | # truncate_slug_length = 40
 28 | 
 29 | # set to 'true' to run the environment during
 30 | # the 'revision' command, regardless of autogenerate
 31 | # revision_environment = false
 32 | 
 33 | # set to 'true' to allow .pyc and .pyo files without
 34 | # a source .py file to be detected as revisions in the
 35 | # versions/ directory
 36 | # sourceless = false
 37 | 
 38 | # version location specification; This defaults
 39 | # to alembic/versions.  When using multiple version
 40 | # directories, initial revisions must be specified with --version-path.
 41 | # The path separator used here should be the separator specified by "version_path_separator" below.
 42 | # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
 43 | 
 44 | # version path separator; As mentioned above, this is the character used to split
 45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
 46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
 47 | # Valid values for version_path_separator are:
 48 | #
 49 | # version_path_separator = :
 50 | # version_path_separator = ;
 51 | # version_path_separator = space
 52 | version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
 53 | 
 54 | # set to 'true' to search source files recursively
 55 | # in each "version_locations" directory
 56 | # new in Alembic version 1.10
 57 | # recursive_version_locations = false
 58 | 
 59 | # the output encoding used when revision files
 60 | # are written from script.py.mako
 61 | # output_encoding = utf-8
 62 | 
 63 | sqlalchemy.url = driver://user:pass@localhost/dbname
 64 | 
 65 | 
 66 | [post_write_hooks]
 67 | # post_write_hooks defines scripts or Python functions that are run
 68 | # on newly generated revision scripts.  See the documentation for further
 69 | # detail and examples
 70 | 
 71 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
 72 | # hooks = black
 73 | # black.type = console_scripts
 74 | # black.entrypoint = black
 75 | # black.options = -l 79 REVISION_SCRIPT_FILENAME
 76 | 
 77 | # Logging configuration
 78 | [loggers]
 79 | keys = root,sqlalchemy,alembic
 80 | 
 81 | [handlers]
 82 | keys = console
 83 | 
 84 | [formatters]
 85 | keys = generic
 86 | 
 87 | [logger_root]
 88 | level = WARN
 89 | handlers = console
 90 | qualname =
 91 | 
 92 | [logger_sqlalchemy]
 93 | level = WARN
 94 | handlers =
 95 | qualname = sqlalchemy.engine
 96 | 
 97 | [logger_alembic]
 98 | level = INFO
 99 | handlers =
100 | qualname = alembic
101 | 
102 | [handler_console]
103 | class = StreamHandler
104 | args = (sys.stderr,)
105 | level = NOTSET
106 | formatter = generic
107 | 
108 | [formatter_generic]
109 | format = %(levelname)-5.5s [%(name)s] %(message)s
110 | datefmt = %H:%M:%S
111 | 


--------------------------------------------------------------------------------
/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.


--------------------------------------------------------------------------------
/alembic/env.py:
--------------------------------------------------------------------------------
 1 | from logging.config import fileConfig
 2 | 
 3 | import os
 4 | import dotenv
 5 | 
 6 | from sqlalchemy import engine_from_config
 7 | from sqlalchemy import pool
 8 | 
 9 | from alembic import context
10 | 
11 | 
12 | # this is the Alembic Config object, which provides
13 | # access to the values within the .ini file in use.
14 | config = context.config
15 | 
16 | STAGE = os.environ.get('R1X_STAGE', 'dev')
17 | dotenv.load_dotenv(f'.env.{STAGE}')
18 | 
19 | config.set_main_option(
20 |     "sqlalchemy.url", os.environ["DB_CONNECTION_STRING"]
21 | )
22 | 
23 | # Interpret the config file for Python logging.
24 | # This line sets up loggers basically.
25 | if config.config_file_name is not None:
26 |     fileConfig(config.config_file_name)
27 | 
28 | # add your model's MetaData object here
29 | # for 'autogenerate' support
30 | # from myapp import mymodel
31 | # target_metadata = mymodel.Base.metadata
32 | from sqlalchemy import MetaData
33 | import src.db_models
34 | target_metadata = src.db_models.Base.metadata
35 | #target_metadata = None
36 | 
37 | # other values from the config, defined by the needs of env.py,
38 | # can be acquired:
39 | # my_important_option = config.get_main_option("my_important_option")
40 | # ... etc.
41 | 
42 | 
43 | def run_migrations_offline() -> None:
44 |     """Run migrations in 'offline' mode.
45 | 
46 |     This configures the context with just a URL
47 |     and not an Engine, though an Engine is acceptable
48 |     here as well.  By skipping the Engine creation
49 |     we don't even need a DBAPI to be available.
50 | 
51 |     Calls to context.execute() here emit the given string to the
52 |     script output.
53 | 
54 |     """
55 |     url = config.get_main_option("sqlalchemy.url")
56 |     context.configure(
57 |         url=url,
58 |         target_metadata=target_metadata,
59 |         literal_binds=True,
60 |         dialect_opts={"paramstyle": "named"},
61 |     )
62 | 
63 |     with context.begin_transaction():
64 |         context.run_migrations()
65 | 
66 | 
67 | def run_migrations_online() -> None:
68 |     """Run migrations in 'online' mode.
69 | 
70 |     In this scenario we need to create an Engine
71 |     and associate a connection with the context.
72 | 
73 |     """
74 |     connectable = engine_from_config(
75 |         config.get_section(config.config_ini_section, {}),
76 |         prefix="sqlalchemy.",
77 |         poolclass=pool.NullPool,
78 |     )
79 | 
80 |     with connectable.connect() as connection:
81 |         context.configure(
82 |             connection=connection, target_metadata=target_metadata
83 |         )
84 | 
85 |         with context.begin_transaction():
86 |             context.run_migrations()
87 | 
88 | 
89 | if context.is_offline_mode():
90 |     run_migrations_offline()
91 | else:
92 |     run_migrations_online()
93 | 


--------------------------------------------------------------------------------
/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade() -> None:
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade() -> None:
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/alembic/versions/05e95b22503f_initial_migration.py:
--------------------------------------------------------------------------------
 1 | """Initial migration.
 2 | 
 3 | Revision ID: 05e95b22503f
 4 | Revises: 
 5 | Create Date: 2023-05-10 01:55:00.147864
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = '05e95b22503f'
14 | down_revision = None
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade() -> None:
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.drop_index('user_settings_created_at', table_name='user_settings')
22 |     op.drop_index('user_settings_user_id', table_name='user_settings')
23 |     op.create_index(op.f('ix_user_settings_createdAt'), 'user_settings', ['createdAt'], unique=False)
24 |     op.create_index(op.f('ix_user_settings_user_id'), 'user_settings', ['user_id'], unique=False)
25 |     # ### end Alembic commands ###
26 | 
27 | 
28 | def downgrade() -> None:
29 |     # ### commands auto generated by Alembic - please adjust! ###
30 |     op.drop_index(op.f('ix_user_settings_user_id'), table_name='user_settings')
31 |     op.drop_index(op.f('ix_user_settings_createdAt'), table_name='user_settings')
32 |     op.create_index('user_settings_user_id', 'user_settings', ['user_id'], unique=False)
33 |     op.create_index('user_settings_created_at', 'user_settings', ['createdAt'], unique=False)
34 |     # ### end Alembic commands ###
35 | 


--------------------------------------------------------------------------------
/alembic/versions/7a4408168dda_add_events_table.py:
--------------------------------------------------------------------------------
 1 | """Add events table.
 2 | 
 3 | Revision ID: 7a4408168dda
 4 | Revises: 05e95b22503f
 5 | Create Date: 2023-05-14 23:03:50.906104
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | from sqlalchemy.dialects.postgresql import JSONB
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = '7a4408168dda'
14 | down_revision = '05e95b22503f'
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | def upgrade():
19 |     op.create_table(
20 |         'events',
21 |         sa.Column('id', sa.Integer, primary_key=True),
22 |         sa.Column('type', sa.String),
23 |         sa.Column('ref_table', sa.String),
24 |         sa.Column('ref_id', sa.Integer),
25 |         sa.Column('body', JSONB),
26 |         sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('NOW()')),
27 |     )
28 |     op.create_index('ix_events_type', 'events', ['type'])
29 |     op.create_index('ix_events_ref', 'events', ['ref_table', 'ref_id'])
30 | 
31 | def downgrade():
32 |     op.drop_index('ix_events_ref', table_name='events')
33 |     op.drop_index('ix_events_type', table_name='events')
34 |     op.drop_table('events')
35 | 
36 | 


--------------------------------------------------------------------------------
/alembic/versions/8a6746b2ce16_add_timers_table.py:
--------------------------------------------------------------------------------
 1 | """add timers table
 2 | 
 3 | Revision ID: 8a6746b2ce16
 4 | Revises: 7a4408168dda
 5 | Create Date: 2023-05-20 01:05:43.449156
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | from sqlalchemy.dialects.postgresql import JSONB
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = '8a6746b2ce16'
14 | down_revision = '7a4408168dda'
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | def upgrade():
19 |     op.create_table(
20 |         'timers',
21 |         sa.Column('id', sa.Integer, primary_key=True),
22 |         sa.Column('chat_id', sa.String, index=True),
23 |         sa.Column('trigger_timestamp', sa.DateTime, index=True),
24 |         sa.Column('data', JSONB),
25 |         sa.Column('created_at', sa.DateTime),
26 |         sa.Column('updated_at', sa.DateTime)
27 |     )
28 | 
29 | def downgrade():
30 |     op.drop_table('timers')
31 | 
32 | 


--------------------------------------------------------------------------------
/requirements.pip:
--------------------------------------------------------------------------------
 1 | python-box==7.0.1
 2 | requests==2.29.0
 3 | tiktoken==0.3.3
 4 | SQLAlchemy==2.0.15
 5 | sqlalchemy-stubs==0.4
 6 | python-telegram-bot==20.3
 7 | backoff==2.2.1
 8 | openai==0.27.6
 9 | python-dotenv==1.0.0
10 | psycopg2==2.9.6
11 | posthog==3.0.1
12 | pydub==0.25.1
13 | pydub-stubs==0.25.1.0
14 | boto3==1.26.125
15 | numpy==1.24.3
16 | langchain==0.0.189
17 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/__init__.py


--------------------------------------------------------------------------------
/src/db_models.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import os
  3 | 
  4 | import sqlalchemy
  5 | from sqlalchemy import create_engine, func
  6 | from sqlalchemy import Boolean, Column, DateTime, Index, Integer, JSON, String, Text, text, TypeDecorator
  7 | from sqlalchemy.dialects.postgresql import JSONB
  8 | from sqlalchemy.ext.declarative import declarative_base
  9 | from sqlalchemy.orm import sessionmaker
 10 | from sqlalchemy.engine.url import URL
 11 | 
 12 | # JSONB is not supported by SQLite, but is supported by PostgreSQL.
 13 | # DialectAdapter selects the right one is used per database type.
 14 | class DialectAdapter(TypeDecorator):
 15 |     impl = JSON
 16 | 
 17 |     def load_dialect_impl(self, dialect):
 18 |         if dialect.name == 'postgresql':
 19 |             return dialect.type_descriptor(JSONB())
 20 |         else:
 21 |             return dialect.type_descriptor(JSON())
 22 | 
 23 | ### Start of table definitions ###
 24 | 
 25 | Base = declarative_base()
 26 | metadata = Base.metadata
 27 | 
 28 | class Message(Base):
 29 |     __tablename__ = 'Messages'
 30 |     __table_args__ = (
 31 |         Index('index_on_messages_chat_id_message_id', 'chatId', 'messageId', unique=True),
 32 |         Index('index_on_messages_created_at_chat_id', 'createdAt', 'chatId')
 33 |     )
 34 | 
 35 |     id = Column(Integer, primary_key=True)
 36 |     source = Column(String(255))
 37 |     messageTimestamp = Column(DateTime(True))
 38 |     chatType = Column(String(255))
 39 |     chatId = Column(String(255))
 40 |     senderId = Column(String(255))
 41 |     isSentByMe = Column(Boolean)
 42 |     messageId = Column(String(255))
 43 |     replyToMessageId = Column(String(255))
 44 |     kind = Column(String(255))
 45 |     body = Column(Text)
 46 |     rawSource = Column(JSON)
 47 |     createdAt = Column(DateTime(True), nullable=False)
 48 |     updatedAt = Column(DateTime(True), nullable=False)
 49 | 
 50 | 
 51 | class SequelizeMeta(Base):
 52 |     __tablename__ = 'SequelizeMeta'
 53 | 
 54 |     name = Column(String(255), primary_key=True)
 55 | 
 56 | 
 57 | class UserSettings(Base):
 58 |     __tablename__ = 'user_settings'
 59 | 
 60 |     id = Column(Integer, primary_key=True)
 61 |     user_id = Column(String(255), nullable=False, index=True)
 62 |     settings = Column(DialectAdapter, nullable=False)
 63 |     version = Column(Integer, nullable=False)
 64 |     createdAt = Column(DateTime(True), nullable=False, index=True)
 65 |     updatedAt = Column(DateTime(True), nullable=False)
 66 | 
 67 | class Event(Base):
 68 |     __tablename__ = 'events'
 69 | 
 70 |     id = Column(Integer, primary_key=True)
 71 |     type = Column(String)
 72 |     ref_table = Column(String)
 73 |     ref_id = Column(Integer)
 74 |     body = Column(DialectAdapter)
 75 |     created_at = Column(DateTime(timezone=True), default=func.now(), nullable=False)
 76 | 
 77 |     __table_args__ = (
 78 |         sqlalchemy.Index('ix_events_type', 'type'),
 79 |         sqlalchemy.Index('ix_events_ref', 'ref_table', 'ref_id'),
 80 |     )
 81 | 
 82 | class Timer(Base):
 83 |     __tablename__ = 'timers'
 84 | 
 85 |     id = Column(Integer, primary_key=True)
 86 |     chat_id = Column(String, index=True)
 87 |     trigger_timestamp = Column(DateTime, index=True)
 88 |     data = Column(DialectAdapter)
 89 |     created_at = Column(DateTime)
 90 |     updated_at = Column(DateTime)
 91 | 
 92 | ### End of table definitions ###
 93 | 
 94 | # Set up the database connection
 95 | engine = create_engine(os.environ['DB_CONNECTION_STRING'])
 96 | 
 97 | # Create a session factory
 98 | Session = sessionmaker(bind=engine)
 99 | 
100 | # Register models
101 | Base.metadata.create_all(engine)
102 | 


--------------------------------------------------------------------------------
/src/infra/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/infra/__init__.py


--------------------------------------------------------------------------------
/src/infra/context.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | from typing import Any, Dict, Union
 3 | from infra import logger
 4 | 
 5 | 
 6 | class ThreadSafeCounter:
 7 |     def __init__(self):
 8 |         self._counter = 0
 9 |         self._lock = threading.Lock()
10 | 
11 |     def get_and_increment(self):
12 |         with self._lock:
13 |             val = self._counter
14 |             self._counter += 1
15 |             return val
16 | 
17 | # Usage
18 | counter = ThreadSafeCounter()
19 | 
20 | class Context(object):  
21 |     def __init__(self):
22 |         self.user_channel = None    # type: str
23 |         self.user_settings = {}     # type: Dict[str, Any]
24 | 
25 |         self.msg_count = counter.get_and_increment()
26 |         self.logger = logger.create_logging_context(self.msg_count)
27 | 
28 |         self.stats = {}
29 |     
30 |     def log(self, message:Any, *args:Any) -> None:
31 |         self.logger.log(message, args)
32 |         
33 |     def set_stat(self, key: str, value: Union[int, bool, float, str]):
34 |         self.stats[key] = value
35 | 


--------------------------------------------------------------------------------
/src/infra/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | from logging.handlers import TimedRotatingFileHandler
 4 | 
 5 | # This code was migrated from node.js to Python using ChatGPT.
 6 | # Rotation is not necessarily working well.
 7 | max_file_size = os.environ.get("MAX_LOG_FILE_SIZE", 100 * 1024 * 1024)
 8 | max_log_files = int(os.environ.get("MAX_LOG_FILES", 50))
 9 | 
10 | log_formatter = logging.Formatter('%(asctime)s.%(msecs)03d %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
11 | 
12 | file_handler = TimedRotatingFileHandler('./logs/r1x.log', when='D', interval=1, backupCount=max_log_files)
13 | file_handler.setFormatter(log_formatter)
14 | file_handler.setLevel(logging.INFO)
15 | file_handler.suffix = '%Y-%m-%d'
16 | file_handler.extMatch = file_handler.extMatch
17 | 
18 | console_handler = logging.StreamHandler()
19 | console_handler.setFormatter(log_formatter)
20 | console_handler.setLevel(logging.INFO)
21 | 
22 | logger = logging.getLogger()
23 | logger.setLevel(logging.INFO)
24 | logger.addHandler(file_handler)
25 | logger.addHandler(console_handler)
26 | 
27 | class log_context():
28 |     def __init__(self, context):
29 |         self.context = context;
30 | 
31 |     def log(self, message, *args):
32 |         merged_message = f"[{self.context}] {message} {' '.join(str(arg) for arg in args)}"
33 |         logger.info(merged_message)
34 | 
35 | def create_logging_context(context):
36 |     return log_context(context)
37 | 


--------------------------------------------------------------------------------
/src/infra/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | import sys
 4 | from pathlib import Path
 5 | from dotenv import load_dotenv
 6 | from pydub import AudioSegment
 7 | 
 8 | from infra.context import Context
 9 | from infra.logger import logger
10 | 
11 | def download_stream_file(ctx:Context, url, path, headers=None):
12 |     # Create the directory if it doesn't exist
13 |     dir_path = Path(path).parent
14 |     os.makedirs(dir_path, exist_ok=True)
15 | 
16 |     is_successful = False  # Variable to track download status
17 | 
18 |     response = requests.get(url, headers=headers, stream=True)
19 | 
20 |     with open(path, 'wb') as file:
21 |         for chunk in response.iter_content(chunk_size=8192):
22 |             file.write(chunk)
23 | 
24 |     if response.status_code == 200:
25 |         ctx.log("downloadFile succeeded")
26 |         is_successful = True
27 | 
28 |     return is_successful
29 | 
30 | def convert_audio_to_mp3(ctx:Context, orig_file_path:str, mp3_file_path:str) -> str:
31 |     audio = AudioSegment.from_file(orig_file_path)
32 |     audio.export(mp3_file_path, format="mp3")
33 |     ctx.log("convertAudioToMp3 succeeded")
34 | 
35 |     return mp3_file_path
36 | 
37 | def load_env():
38 |     stage = os.environ.get("R1X_STAGE", "dev")
39 |     logger.info(f"Running R1X bot in {stage} mode...")
40 | 
41 |     load_dotenv(f"./.env.{stage}")
42 | 
43 | 
44 |     # If no database is provided, resort to a locally-hosted SQLite version.
45 |     # Typically used for testing.
46 |     if os.environ.get('DB_CONNECTION_STRING', '') == '':
47 |         os.environ['DB_CONNECTION_STRING'] = 'sqlite:///file::memory:?cache=shared'
48 | 
49 |     local_dev_required_envs = ['OPENAI_API_KEY', 'TELEGRAM_BOT_TOKEN', 'TELEGRAM_BOT_NAME', 'SERPER_API_KEY']
50 |     all_required_envs = local_dev_required_envs + ['AZURE_OPENAI_KEY', 'FACEBOOK_GRAPH_VERSION', 'WHATSAPP_BOT_TOKEN', 'WHATSAPP_PHONE_NUMBER_ID', 'WHATSAPP_PHONE_NUMBER', 'DB_CONNECTION_STRING', 'SQS_QUEUE_URL', 'DREAMSTUDIO_API_KEY', 'POSTHOG_API_KEY']
51 | 
52 |     required_envs = local_dev_required_envs if stage == 'dev-local' else all_required_envs
53 | 
54 |     # Ensure all reuqired environment variables are set up
55 |     for v in required_envs:
56 |         if os.environ.get(v, "") == "":
57 |             print(f"Environment variable {v} is undefined or an empty string. Pleas configure it via you .env.{stage} file.")
58 |             sys.exit(1)
59 | 


--------------------------------------------------------------------------------
/src/message_handler.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import json
  3 | import os
  4 | import pathlib
  5 | import tempfile
  6 | 
  7 | from posthog import Posthog
  8 | from sqlalchemy import desc
  9 | 
 10 | from typing import Any, Dict
 11 | 
 12 | from services.messengers import messenger_factory
 13 | from services.messengers.messenger import MessagingService
 14 | 
 15 | from services.open_ai.query_openai import get_chat_completion, get_chat_completion_with_tools, create_transcription
 16 | import db_models
 17 | from services.message_db import insert_message, get_message_history
 18 | import services.messengers as messengers
 19 | from infra.context import Context
 20 | 
 21 | posthog_client = None
 22 | if os.environ.get('POSTHOG_API_KEY', '') != '':
 23 |     posthog_client = Posthog(
 24 |         os.environ['POSTHOG_API_KEY'],
 25 |         host='https://app.posthog.com'
 26 |     )
 27 | 
 28 | def posthog_capture(distinct_id, event, properties):
 29 |     if posthog_client == None:
 30 |         return
 31 | 
 32 |     posthog_client.capture(distinct_id=distinct_id, event=event, properties=properties)
 33 | 
 34 | def get_user_settings(parsed_message) -> Dict[str, Any]: 
 35 |     user_id = f"{parsed_message.source}:{parsed_message.chatId}"
 36 |     session = db_models.Session()
 37 | 
 38 |     settings = session.query(db_models.UserSettings) \
 39 |                 .filter(db_models.UserSettings.user_id == user_id) \
 40 |                 .order_by(desc(db_models.UserSettings.createdAt)) \
 41 |                 .limit(1) \
 42 |                 .one_or_none()
 43 | 
 44 |     session.close()
 45 | 
 46 |     return getattr(settings, 'settings', {})
 47 | 
 48 | 
 49 | def handle_incoming_message(ctx: Context, event):
 50 |     in_flight = {"working": True}
 51 | 
 52 |     try:
 53 |         handle_incoming_message_core(ctx, event, in_flight)
 54 |     except Exception as error:
 55 |         ctx.log("Message processing failed: ",error)
 56 |         raise Exception("Message processing failed.")
 57 |     finally:
 58 |         in_flight["working"] = False
 59 | 
 60 | 
 61 | def handle_incoming_message_core(ctx:Context, event, in_flight):
 62 |     start = time.time()
 63 |     parsed_event = json.loads(event)
 64 |     ctx.log(parsed_event)
 65 |     messenger = messenger_factory.make_messenger_from_event(parsed_event)
 66 | 
 67 |     if messenger is None:
 68 |         return
 69 | 
 70 |     parse_message_result = messenger.parse_message(parsed_event["event"])
 71 |     parsed_message, file_info = parse_message_result
 72 | 
 73 |     messenger.set_status_read(ctx, parsed_message.messageId)
 74 | 
 75 |     ctx.user_settings = get_user_settings(parsed_message)
 76 |     ctx.user_channel = ctx.user_settings.get('channel', 'stable')
 77 | 
 78 |     if not ctx.user_settings.get('enabled', False):
 79 |         messenger.send_message(ctx, {
 80 |             "chat_id": parsed_message["chatId"],
 81 |             "kind": "text",
 82 |             "body": "Robot 1-X is no longer accessible for free. If you require access, please send a WhatsApp message to +16692221028.\n\nIf you simply require ChatGPT on your smartphone, you can use https://play.google.com/store/apps/details?id=com.openai.chatgpt (Android) or https://apps.apple.com/us/app/chatgpt/id6448311069 (iPhone)."
 83 |         })
 84 |         return
 85 | 
 86 |     is_typing = False
 87 | 
 88 |     if parsed_message.kind == "voice":
 89 |         is_typing = True
 90 |         handle_audio_message(ctx, messenger, parsed_message, file_info, in_flight)
 91 | 
 92 |         if parsed_message.isForwarded:
 93 |             return
 94 | 
 95 |     message = insert_message(ctx, parsed_message)
 96 | 
 97 |     if message.isSentByMe or message.body is None:
 98 |         return
 99 | 
100 |     if not messenger.is_message_for_me(message):
101 |         return
102 | 
103 |     if not is_typing:
104 |         messenger.set_typing(in_flight)
105 |         is_typing = True
106 | 
107 |     message_history = get_message_history(ctx, message)
108 |     ctx.log("message history pulled.")
109 | 
110 |     if len(message_history) <= 1:
111 |         ctx.log("sending intro message.")
112 |         send_intro_message(ctx, messenger, parsed_message)
113 |         return
114 | 
115 |     ctx.log("calling get_chat_completion...")
116 |     messenger_name = "WhatsApp" if parsed_event["source"] == "wa" else "Telegram"
117 |     completion = get_chat_completion_with_tools(ctx, messenger_name, message_history, direct=False)
118 | 
119 |     ctx.log({"completion": completion})
120 |     ctx.log("get_chat_completion done, result is ", completion.response)
121 | 
122 |     send_and_store(ctx, messenger, {
123 |         'chat_id': parsed_message.chatId,
124 |         'kind': "text",
125 |         'body': completion.response
126 |     })
127 | 
128 |     response_time_ms = int((time.time() - parsed_message.messageTimestamp) * 1000)
129 |     processing_time_ms = int((time.time() - start) * 1000)
130 |     completion_tokens_per_sec = completion.completionTokens / (processing_time_ms / 1000)
131 | 
132 |     ctx.set_stat('channel', ctx.user_channel)
133 |     ctx.set_stat('prompt_tokens', completion.promptTokens)
134 |     ctx.set_stat('completion_tokens', completion.completionTokens)
135 |     ctx.set_stat('completion_tokens_per_sec', completion_tokens_per_sec)
136 |     ctx.set_stat('total_tokens', completion.promptTokens + completion.completionTokens)
137 |     ctx.set_stat('response_time_ms', response_time_ms)
138 |     ctx.set_stat('processing_time_ms', processing_time_ms)
139 | 
140 |     ph_props = {
141 |             'senderId': parsed_message.senderId,
142 |     }
143 | 
144 |     ph_props.update(ctx.stats)
145 | 
146 |     posthog_capture(
147 |         distinct_id = f'{parsed_message.source}:{parsed_message.chatId}',
148 |         event = 'reply-sent',
149 |         properties = ph_props
150 |     )
151 | 
152 | def handle_audio_message(ctx, messenger, parsed_message, file_info, in_flight):
153 |     messenger.set_typing(in_flight)
154 | 
155 |     transcript = get_transcript(ctx, messenger, parsed_message, file_info)
156 |     text = "\N{SPEAKING HEAD IN SILHOUETTE}\N{MEMO}: " + transcript
157 | 
158 |     send_attrs = {
159 |         "chat_id": parsed_message.chatId,
160 |         "kind": "text",
161 |         "body": text,
162 |         "quote_id": parsed_message.messageId
163 |     }
164 | 
165 |     # Designed behavior:
166 |     #
167 |     # Forwarded messages: transcribe and exit
168 |     # Original messages: transcribe and respond
169 | 
170 |     if parsed_message.isForwarded:
171 |         parsed_message.body = "Please transcribe: <audio.mp3 file>"
172 |         insert_message(ctx, parsed_message)
173 |         send_and_store(ctx, messenger, send_attrs)
174 |     else:
175 |         parsed_message.body = transcript
176 |         # Use messenger.send_message directly, so transcribed reply is not stored in DB
177 |         messenger.send_message(ctx, send_attrs)
178 | 
179 |     posthog_capture(
180 |         distinct_id = f"{parsed_message.source}:{parsed_message.chatId}",
181 |         event = "message-transcribed",
182 |         properties = {
183 |             'sender_id': parsed_message.senderId,
184 |             'channel': ctx.user_channel,
185 |             'length_in_seconds': -1
186 |         }
187 |     )
188 | 
189 | 
190 | 
191 | def send_intro_message(ctx:Context, messenger, parsed_message):
192 |     intro_message_legal = """Robot 1-X at your service!
193 | 
194 | First, be aware that while I always do my best to help, I am not a professional doctor, psychologist, banker or otherwise.
195 | Some of my replies may provide incorrect information about people, locations and events.
196 | Always check my suggestions with a professional.
197 | 
198 | 
199 | If you're under 18, you must have your parents' permission before you continue talking to me!
200 | 
201 | Chatting with me means you agree to my Terms of Use (https://r1x.ai/terms-of-use) and Privacy policy (https://r1x.ai/privacy).
202 | Make sure to read them before continuing this chat."""
203 | 
204 |     intro_message_overview = """Here are some things you can ask me for:
205 | 
206 | - Write a bedtime story about Abigail and Jonathan, two superheroes who live next to a river.
207 | - Plan a 14-day road trip from Milan to Minsk. Include detailed suggestions about where to spend each day.
208 | - Rewrite the following text with spell-checking and punctuation: pleez send me all the docooments that is need for tomorrow flight im waiting for dem.
209 | - Please summarize the following text: <copy some text/email here>.
210 | 
211 | And, you can record a message instead of typing!
212 | 
213 | How can I help?"""
214 | 
215 |     send_and_store(ctx, messenger, {
216 |         "chat_id": parsed_message["chatId"],
217 |         "kind": "text",
218 |         "body": intro_message_legal
219 |     })
220 | 
221 |     send_and_store(ctx, messenger, {
222 |         "chat_id": parsed_message["chatId"],
223 |         "kind": "text",
224 |         "body": intro_message_overview
225 |     })
226 | 
227 | def get_transcript(ctx:Context, messenger, parsed_message, file_info):
228 |     mp3_file_path = None
229 | 
230 |     audio_root = pathlib.Path(tempfile.gettempdir()) / 'r1x' / 'audio'
231 |     audio_root.mkdir(exist_ok=True)
232 | 
233 |     with tempfile.TemporaryDirectory(dir=audio_root, ignore_cleanup_errors=True) as workdir:
234 |         mp3_file_path = messenger.get_voice_mp3_file(ctx, parsed_message, file_info, pathlib.Path(workdir))
235 |         transcription = create_transcription(ctx, mp3_file_path)
236 | 
237 |         return transcription
238 | 
239 | def send_and_store(ctx: Context, messenger: MessagingService, message_attributes):
240 |     response = messenger.send_message(ctx, message_attributes)
241 | 
242 |     if response:
243 |         insert_message(ctx, response)
244 | 


--------------------------------------------------------------------------------
/src/run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import json
 4 | import os
 5 | 
 6 | import boto3
 7 | from services.timers import alert_users
 8 | 
 9 | from infra import logger
10 | from infra.context import Context
11 | 
12 | import message_handler
13 | 
14 | import threading
15 | import traceback
16 | 
17 | from telegram import ForceReply, Update
18 | from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters
19 | 
20 | NUM_CONSUMERS = 10
21 | 
22 | QUEUE_URL = os.environ["SQS_QUEUE_URL"]
23 | 
24 | def process_message(message):
25 |     ctx = Context()
26 |     result = message_handler.handle_incoming_message(ctx, message['Body'])
27 |     ctx.log("Finished handling message")
28 | 
29 | def single_sqs_handler(queue):
30 |     while True:
31 |         try:
32 |             single_sqs_handler_core(queue)
33 |         except Exception as e:
34 |             logger.logger.error(f'Exception occurred; {e}; stack trace: ', traceback.format_exc())
35 | 
36 | def single_sqs_handler_core(queue):
37 |     response = queue.receive_message(QueueUrl=QUEUE_URL, MaxNumberOfMessages=1, WaitTimeSeconds=20)
38 | 
39 |     if not 'Messages' in response:
40 |        return
41 | 
42 |     # Single message each time
43 |     message = response['Messages'][0]
44 | 
45 |     process_message(message)
46 | 
47 |     queue.delete_message(QueueUrl=QUEUE_URL, ReceiptHandle=message['ReceiptHandle'])
48 | 
49 | def launch_sqs_threads():
50 |     logger.logger.info(f'Listening on {NUM_CONSUMERS} queues...')
51 | 
52 |     threads = []
53 |   
54 |     for i in range(NUM_CONSUMERS):
55 |         queue = boto3.client('sqs', region_name='eu-central-1')
56 |         thread = threading.Thread(target=single_sqs_handler, args=(queue,))
57 |         thread.start()
58 |         threads.append(thread)
59 | 
60 |     return threads
61 | 
62 | async def handle_local_incoming_telegram_message(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
63 |     message = { 'Body' : json.dumps({ 'source' : 'tg', 'event' : json.loads(update.to_json()) }) }
64 |     ctx = Context()
65 | 
66 |     process_message(message)
67 | 
68 | def launch_local_telegram_listener():
69 |     # Create the Application and pass it your bot's token.
70 |     application = Application.builder().token(os.environ['TELEGRAM_BOT_TOKEN']).build()
71 | 
72 |     # on non command i.e message - echo the message on Telegram
73 |     tg_filters = (filters.AUDIO | filters.TEXT | filters.VOICE) & ~filters.COMMAND
74 |     application.add_handler(MessageHandler(tg_filters, handle_local_incoming_telegram_message))
75 | 
76 |     # Run the bot until the user presses Ctrl-C
77 |     application.run_polling()
78 | 
79 |     # Threads to wait on; never reached
80 |     return []
81 | 
82 | def main():
83 |     threads = []
84 | 
85 |     timer_thread = threading.Thread(target=alert_users)
86 |     timer_thread.start()
87 |     threads.append(timer_thread)
88 | 
89 |     if os.environ['R1X_STAGE'] in ['dev', 'prod']:
90 |         threads = launch_sqs_threads()
91 |     else:
92 |         threads = launch_local_telegram_listener()
93 | 
94 |     for thread in threads:
95 |         thread.join()
96 | 
97 | if __name__ == "__main__":
98 |     main()
99 | 


--------------------------------------------------------------------------------
/src/services/message_db.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import and_, desc
 2 | import db_models
 3 | import datetime
 4 | 
 5 | from infra.context import Context
 6 | 
 7 | def insert_message(ctx:Context, attributes):
 8 |     source = attributes['source']
 9 |     message_timestamp = datetime.datetime.fromtimestamp(attributes['messageTimestamp'], tz=datetime.timezone.utc)
10 |     chat_type = attributes['chatType']
11 |     chat_id = attributes['chatId']
12 |     sender_id = attributes['senderId']
13 |     is_sent_by_me = attributes['isSentByMe']
14 |     message_id = attributes['messageId']
15 |     reply_to_message_id = attributes['replyToMessageId']
16 |     kind = attributes['kind']
17 |     body = attributes['body']
18 |     raw_source = attributes['rawSource']
19 | 
20 |     ctx.log('insertMessage attributes:', attributes)
21 | 
22 |     with db_models.Session() as session:
23 |         existing_message = session.query(db_models.Message).filter(and_(db_models.Message.chatId == chat_id, db_models.Message.messageId == message_id)).one_or_none()
24 | 
25 |         if existing_message:
26 |             return existing_message
27 | 
28 |         now = datetime.datetime.now()
29 | 
30 |         message = db_models.Message(
31 |             source=source,
32 |             messageTimestamp=message_timestamp,
33 |             chatType=chat_type,
34 |             chatId=chat_id,
35 |             senderId=sender_id,
36 |             isSentByMe=is_sent_by_me,
37 |             messageId=message_id,
38 |             replyToMessageId=reply_to_message_id,
39 |             kind=kind,
40 |             body=body,
41 |             rawSource=raw_source,
42 |             createdAt=now,
43 |             updatedAt=now
44 |         )
45 | 
46 |         session.add(message)
47 |         session.commit()
48 |         session.refresh(message)
49 | 
50 |         session.close()
51 | 
52 |     return message
53 | 
54 | def get_message_history(ctx:Context, message, options=None):
55 |     if options is None:
56 |         options = {}
57 | 
58 |     limit = options.get('limit', 20)
59 |     chat_id = message.chatId
60 |     message_timestamp = message.messageTimestamp
61 | 
62 |     with db_models.Session() as session:
63 |         messages = session.query(db_models.Message) \
64 |                    .filter(and_(db_models.Message.chatId == chat_id, db_models.Message.messageTimestamp <= message_timestamp)) \
65 |                    .order_by(desc(db_models.Message.createdAt)).limit(limit).all()
66 | 
67 |         session.close()
68 | 
69 |     return list(reversed(messages))
70 | 


--------------------------------------------------------------------------------
/src/services/messengers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/services/messengers/__init__.py


--------------------------------------------------------------------------------
/src/services/messengers/messenger.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Tuple
 3 | 
 4 | from box import Box
 5 | 
 6 | from infra.context import Context
 7 | 
 8 | 
 9 | class MessageKindE:
10 |     TEXT = 'text'
11 |     VOICE = 'voice'
12 |     AUDIO = 'audio'
13 | 
14 | 
15 | class MessagingService(ABC):
16 |     def __init__(self, chat_id: str):
17 |         super().__init__()
18 |         self.chat_id = chat_id
19 | 
20 |     @abstractmethod
21 |     def parse_message(self, message) -> Tuple[Box, Box]:
22 |         pass
23 | 
24 |     @abstractmethod
25 |     def send_message(self, ctx:Context, attributes) -> Box:
26 |         pass
27 |     
28 |     @abstractmethod
29 |     def send_contact(self, ctx:Context, name:str, handle:str):
30 |         pass
31 |     
32 |     @abstractmethod
33 |     def is_message_for_me(self, message) -> bool:
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def set_typing(self, in_flight) ->None:
38 |         pass
39 | 
40 |     @abstractmethod
41 |     def get_voice_mp3_file(self, ctx:Context, parsed_message, file_info, work_dir) -> str:
42 |         pass
43 | 
44 |     @abstractmethod
45 |     def set_status_read(self, ctx:Context, message_id) -> None:
46 |         pass


--------------------------------------------------------------------------------
/src/services/messengers/messenger_factory.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Tuple, Optional, Type, Callable
 2 | from services.messengers.messenger import MessagingService
 3 | from services.messengers.tg import TelegramMessenger
 4 | from services.messengers.wa import WhatsappMessenger
 5 | 
 6 | messenger_by_type: Dict[str, Type[MessagingService]] = {'tg': TelegramMessenger, 'wa': WhatsappMessenger}
 7 | 
 8 | 
 9 | def make_messenger(messenger_chat_id: str) -> MessagingService:
10 |     messenger_str, chat_id = messenger_chat_id.split(":")
11 |     messenger = messenger_by_type[messenger_str](chat_id)
12 |     return messenger
13 | 
14 | 
15 | def _make_wa_messenger_from_event(event: Dict) -> Optional[MessagingService]:
16 |     entry_changes0 = event['event']['entry'][0]['changes'][0]['value']
17 |     if 'messages' not in entry_changes0:
18 |         # not a message event.
19 |         return None
20 | 
21 |     chat_id = entry_changes0['messages'][0]['from']
22 |     messenger = messenger_by_type[event['source']](chat_id)
23 |     return messenger
24 | 
25 | 
26 | def _make_tg_messenger_from_event(event: Dict) -> MessagingService:
27 |     chat_id = str(event['event']['message']['chat']['id'])
28 |     messenger = messenger_by_type[event['source']](chat_id)
29 |     return messenger
30 | 
31 | 
32 | messenger_factory_by_type: Dict[str, Callable] = {'tg': _make_tg_messenger_from_event, 'wa': _make_wa_messenger_from_event}
33 | 
34 | 
35 | def make_messenger_from_event(event: Dict) -> Optional[MessagingService]:
36 |     messenger = messenger_factory_by_type[event['source']](event)
37 |     return messenger
38 | 


--------------------------------------------------------------------------------
/src/services/messengers/tg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import tempfile
  4 | from typing import Optional
  5 | import requests
  6 | 
  7 | from infra.context import Context
  8 | from services.messengers.messenger import MessageKindE, MessagingService
  9 | from infra import utils
 10 | from box import Box
 11 | 
 12 | import threading
 13 | 
 14 | TELEGRAM_SENDER_ID = os.environ['TELEGRAM_BOT_TOKEN'].split(':')[0]
 15 | 
 16 | class TelegramMessenger(MessagingService):
 17 |     
 18 |     def _get_message_kind(self, message) -> Optional[str]:
 19 |         if 'text' in message:
 20 |             return MessageKindE.TEXT
 21 |         elif 'voice' in message:
 22 |             return MessageKindE.VOICE
 23 |         elif 'audio' in message:
 24 |             return MessageKindE.AUDIO
 25 |         return None
 26 | 
 27 |     def parse_message(self, message):
 28 |         message = message['message']
 29 | 
 30 |         source = "tg"
 31 |         message_timestamp = message['date']
 32 |         chat_type = message['chat']['type']
 33 | 
 34 |         sender_id = str(message['from']['id'])
 35 |         is_sent_by_me = message['from']['id'] == int(TELEGRAM_SENDER_ID)
 36 |         is_forwarded = message.get('forward_from', None) != None
 37 |         messageId = str(message['message_id'])
 38 |         reply_to_message_id = message['reply_to_message']['message_id'] if 'reply_to_message' in message else None
 39 |         kind = self._get_message_kind(message)
 40 |         body = message['text'] if 'text' in message else None
 41 |         fileId = message['voice']['file_id'] if kind == MessageKindE.VOICE else None
 42 |         fileUniqueId = message['voice']['file_unique_id'] if kind == MessageKindE.VOICE else None
 43 | 
 44 |         return (
 45 |             Box({
 46 |                 'source': source,
 47 |                 'messageTimestamp': message_timestamp,
 48 |                 'chatType': chat_type,
 49 |                 'chatId': self.chat_id,
 50 |                 'senderId': sender_id,
 51 |                 'isSentByMe': is_sent_by_me,
 52 |                 'isForwarded': is_forwarded,
 53 |                 'messageId': messageId,
 54 |                 'replyToMessageId': reply_to_message_id,
 55 |                 'kind': kind,
 56 |                 'body': body,
 57 |                 'rawSource': message
 58 |             }),
 59 |             Box({
 60 |                 'fileId': fileId,
 61 |                 'fileUniqueId': fileUniqueId
 62 |             })
 63 |         )
 64 | 
 65 |     def send_message(self, ctx:Context, attributes): 
 66 |         quote_id = attributes.get('quote_id')
 67 |         kind = attributes.get('kind')
 68 |         body = attributes.get('body')
 69 | 
 70 |         if kind != "text":
 71 |             return
 72 | 
 73 |         args = {'chat_id': self.chat_id, 'text': body}
 74 |         if quote_id:
 75 |             args['reply_to_message_id'] = quote_id
 76 |             args['allow_sending_without_reply'] = True
 77 | 
 78 |         response = requests.post(
 79 |             f'https://api.telegram.org/bot{os.environ["TELEGRAM_BOT_TOKEN"]}/sendMessage',
 80 |             json=args
 81 |         ).json()
 82 | 
 83 |         if not response['ok']:
 84 |             return None
 85 |         
 86 |         message = {'message': response['result']}
 87 |         parsed_message, file_info = self.parse_message(message)
 88 | 
 89 |         return parsed_message
 90 |     
 91 |     def send_contact(self, ctx: Context, name:str, handle:str):
 92 |         args = {'chat_id': self.chat_id, 'text': f'https://t.me/{handle}'}
 93 |         response = requests.post(
 94 |             f'https://api.telegram.org/bot{os.environ["TELEGRAM_BOT_TOKEN"]}/sendMessage',
 95 |             json=args
 96 |         ).json()
 97 | 
 98 |         return response
 99 | 
100 | 
101 |     def is_message_for_me(self, msg) -> bool:
102 |         if msg.chatType == "private":
103 |             return True
104 | 
105 |         if msg.body.startswith(f'@{os.environ["TELEGRAM_BOT_NAME"]}'):
106 |             return True
107 | 
108 |         if 'reply_to_message' in msg.rawSource and msg.rawSource['reply_to_message']['from']['id'] == int(TELEGRAM_SENDER_ID):
109 |             return True
110 | 
111 |         return False
112 |     
113 |     
114 |     def get_voice_mp3_file(self, ctx:Context, parsed_message, file_info, work_dir) -> str:
115 |         ctx.log(f"getVoiceMp3File: {parsed_message}, {file_info}, {work_dir}")
116 |         url = self._get_download_url(ctx, file_info.fileId)
117 |         orig_file_path, mp3_file_path = self._get_audio_file_paths(ctx, parsed_message.chatId, file_info, work_dir)
118 | 
119 |         utils.download_stream_file(ctx, url, orig_file_path)
120 |         utils.convert_audio_to_mp3(ctx, orig_file_path, mp3_file_path)
121 | 
122 |         return mp3_file_path
123 | 
124 |     def _get_download_url(self, ctx:Context, file_id):
125 |         args = {"file_id": file_id}
126 | 
127 |         response = requests.post(
128 |             f"https://api.telegram.org/bot{os.environ['TELEGRAM_BOT_TOKEN']}/getFile",
129 |             json=args,
130 |         )
131 |         data = response.json()
132 | 
133 |         if not data["ok"]:
134 |             ctx.log(f"getDownloadUrl failed. response={data}")
135 | 
136 |         remote_file_path = data["result"]["file_path"]
137 |         download_url = f"https://api.telegram.org/file/bot{os.environ['TELEGRAM_BOT_TOKEN']}/{remote_file_path}"
138 | 
139 |         ctx.log(f"getDownloadUrl: downloadUrl={download_url}")
140 |         return download_url
141 | 
142 |     def _get_audio_file_paths(self, ctx:Context, chat_id, file_info, work_dir):
143 |         orig_file_path = work_dir / 'audio.orig'
144 |         mp3_file_path = work_dir / 'audio.mp3'
145 | 
146 |         ctx.log(f"getAudioFilePaths: origFilePath={orig_file_path}, mp3FilePath={mp3_file_path}")
147 | 
148 |         return orig_file_path, mp3_file_path
149 | 
150 |     def set_typing(self, in_flight):
151 |         if not in_flight["working"]:
152 |             return
153 | 
154 |         requests.post(
155 |             f"https://api.telegram.org/bot{os.environ['TELEGRAM_BOT_TOKEN']}/sendChatAction",
156 |             json={"chat_id": self.chat_id, "action": "typing"},
157 |         )
158 | 
159 |         base_timeout = 6
160 |         extra_timeout = random.randint(0, 1500)
161 |         timeout = base_timeout + (extra_timeout / 1000)
162 | 
163 |         timer = threading.Timer(timeout, self.set_typing, args=(in_flight,))
164 |         timer.start()
165 |     
166 |     def set_status_read(self, ctx: Context, message_id) -> None:
167 |         return
168 | 


--------------------------------------------------------------------------------
/src/services/messengers/wa.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import requests
  4 | from services.messengers.messenger import MessageKindE, MessagingService
  5 | from infra import utils
  6 | from box import Box
  7 | import time
  8 | 
  9 | from infra.context import Context
 10 | 
 11 | class EventKindE:
 12 |     STATUS_UPDATE = 'status_update'
 13 |     MESSAGE = 'message'
 14 |     
 15 | 
 16 | class WhatsappMessenger(MessagingService):
 17 |     def _get_event_kind(self, value):
 18 |         if 'statuses' in value:
 19 |             return EventKindE.STATUS_UPDATE
 20 |         if 'messages' in value:
 21 |             return EventKindE.MESSAGE
 22 |         return None
 23 | 
 24 |     def _get_message_kind(self, value) -> str:
 25 |         if value['type'] == 'audio':
 26 |             return MessageKindE.VOICE
 27 |         return value['type']
 28 | 
 29 |     def parse_message(self, message):
 30 |         source = "wa"
 31 |         event_kind = self._get_event_kind(message['entry'][0]['changes'][0]['value'])
 32 |         if event_kind != EventKindE.MESSAGE:
 33 |             return None
 34 | 
 35 |         message0 = message['entry'][0]['changes'][0]['value']['messages'][0]
 36 | 
 37 |         kind = self._get_message_kind(message0)
 38 |         message_timestamp = float(message0['timestamp'])
 39 |         sender_id = message0['from']
 40 |         chat_type = "private"
 41 |         is_sent_by_me = sender_id == os.environ['WHATSAPP_PHONE_NUMBER']
 42 |         is_forwarded = (message0.get('context', {}).get('forwarded', None) != None)
 43 |         message_id = message0['id']
 44 |         reply_to_message_id = message0.get('context', {}).get('id')
 45 | 
 46 |         if kind == MessageKindE.TEXT:
 47 |             body = message0['text']['body']
 48 |         else:
 49 |             body = None
 50 | 
 51 |         if kind == MessageKindE.VOICE:
 52 |             file_id = message0['audio']['id']
 53 |         else:
 54 |             file_id = None
 55 | 
 56 |         file_unique_id = None
 57 | 
 58 |         return [Box({
 59 |             "source": source,
 60 |             "messageTimestamp": message_timestamp,
 61 |             "chatType": chat_type,
 62 |             "chatId": self.chat_id,
 63 |             "senderId": sender_id,
 64 |             "isSentByMe": is_sent_by_me,
 65 |             "isForwarded" : is_forwarded,
 66 |             "messageId": message_id,
 67 |             "replyToMessageId": reply_to_message_id,
 68 |             "kind": kind,
 69 |             "body": body,
 70 |             "rawSource": message
 71 |         }), Box({
 72 |             "fileId": file_id,
 73 |             "fileUniqueId": file_unique_id
 74 |         })]
 75 | 
 76 |     def _get_bot_generated_message(self, ctx:Context, send_message_response, attributes):
 77 |         quote_id = attributes.get('quote_id')
 78 |         kind = attributes.get('kind')
 79 |         body = attributes.get('body')
 80 | 
 81 |         message = {
 82 |             "entry": [
 83 |                 {
 84 |                     "changes": [
 85 |                         {
 86 |                             "value": {
 87 |                                 "messages": [
 88 |                                     {
 89 |                                         "timestamp": (int(time.time() * 1000) / 1e3),
 90 |                                         "from": os.environ['WHATSAPP_PHONE_NUMBER'],
 91 |                                         "id": send_message_response['messages'][0]['id'],
 92 |                                         "type": kind,
 93 |                                         "text": {
 94 |                                             "body": body
 95 |                                         }
 96 |                                     }
 97 |                                 ]
 98 |                             }
 99 |                         }
100 |                     ]
101 |                 }
102 |             ]
103 |         }
104 | 
105 |         return message
106 | 
107 |     def send_message(self, ctx:Context, attributes):
108 |         quote_id = attributes.get('quote_id')
109 |         kind = attributes.get('kind')
110 |         body = attributes.get('body')
111 | 
112 |         if kind != "text":
113 |             return
114 | 
115 |         if len(body) > 4000:
116 |             ctx.log('send_message: message body too long, %d > 4000' % len(body))
117 |             body = body[0:3999]
118 | 
119 |         headers = {
120 |             "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}",
121 |             "Content-Type": "application/json"
122 |         }
123 | 
124 |         args = {
125 |             "messaging_product": "whatsapp",
126 |             "recipient_type": "individual",
127 |             "to": self.chat_id,
128 |             "type": "text",
129 |             "text": {
130 |                 "preview_url": False,
131 |                 "body": body
132 |             }
133 |         }
134 | 
135 |         if quote_id:
136 |             args["context"] = {"message_id": quote_id}
137 | 
138 |         response = self._post_message_request(ctx, headers, args)
139 | 
140 |         if response == None:
141 |             return None
142 | 
143 |         message = self._get_bot_generated_message(ctx, response.json(), attributes)
144 |         parsed_message, _ = self.parse_message(message)
145 |         parsed_message.chatId = self.chat_id
146 | 
147 |         return parsed_message
148 | 
149 |     def _post_message_request(self, ctx:Context, headers:Dict[str,str], args):
150 |         try:
151 |             response = requests.post(
152 |                 f"https://graph.facebook.com/{os.environ['FACEBOOK_GRAPH_VERSION']}/{os.environ['WHATSAPP_PHONE_NUMBER_ID']}/messages",
153 |                 json=args,
154 |                 headers=headers
155 |             )
156 |             response.raise_for_status()
157 |         except requests.exceptions.RequestException as error:
158 |             ctx.log(f"post_message_request: exception. error={error}")
159 |             raise error
160 |         return response
161 |     
162 |     def send_contact(self, ctx: Context, name:str, handle:str):
163 |         headers = {
164 |             "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}",
165 |             "Content-Type": "application/json"
166 |         }
167 |         contact_args = {
168 |             "messaging_product": "whatsapp",
169 |             "recipient_type": "individual",
170 |             "to": self.chat_id,
171 |             "type": "contacts",
172 |             "contacts": [
173 |                 {
174 |                     "addresses": [],
175 |                     "emails": [],
176 |                     "name": {
177 |                         "first_name": name,
178 |                         "formatted_name": name,
179 |                         "last_name": ""
180 |                     },
181 |                     "org": {},
182 |                     "phones": [
183 |                         {
184 |                             "phone": f"+{handle}",
185 |                             "type": "HOME",
186 |                             "wa_id": handle
187 |                         }
188 |                     ],
189 |                     "urls": []
190 |                 }
191 |             ]
192 |         }
193 |         response = self._post_message_request(ctx,headers,contact_args)
194 |         return response.json()     
195 | 
196 |     def is_message_for_me(self, msg) -> bool:
197 |         if msg.chatType == "private":
198 |             return True
199 | 
200 |         return False
201 | 
202 |     def get_voice_mp3_file(self, ctx:Context, parsed_message, file_info, work_dir) -> str:
203 |         ctx.log(f"getVoiceMp3File: {parsed_message}, {file_info}, {work_dir}")
204 |         url = self._get_download_url(ctx, file_info.fileId)
205 |         orig_file_path, mp3_file_path = self._get_audio_file_paths(ctx, parsed_message.chatId, file_info, work_dir)
206 | 
207 |         headers = {
208 |             "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}",
209 |         }
210 | 
211 |         utils.download_stream_file(ctx, url, orig_file_path, headers)
212 |         utils.convert_audio_to_mp3(ctx, orig_file_path, mp3_file_path)
213 | 
214 |         return mp3_file_path
215 | 
216 |     def _get_download_url(self, ctx:Context, file_id):
217 |         ctx.log(f"getDownloadUrl: {file_id}")
218 |         headers = {
219 |             "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}",
220 |         }
221 | 
222 |         try:
223 |             response = requests.get(
224 |                 f"https://graph.facebook.com/{os.environ['FACEBOOK_GRAPH_VERSION']}/{file_id}?phone_number_id={os.environ['WHATSAPP_PHONE_NUMBER_ID']}",
225 |                 headers=headers
226 |             )
227 |             response.raise_for_status()
228 |         except requests.exceptions.RequestException as error:
229 |             ctx.log(f"getDownloadUrl: exception. error={error}")
230 |             raise error
231 | 
232 |         download_url = response.json()['url']
233 | 
234 |         ctx.log(f"getDownloadUrl: downloadUrl={download_url}")
235 |         return download_url
236 | 
237 |     def _get_audio_file_paths(self, ctx:Context, chat_id, file_info, work_dir):
238 |         orig_file_path = work_dir / 'audio.orig'
239 |         mp3_file_path = work_dir / 'audio.mp3'
240 | 
241 |         ctx.log(f"getAudioFilePaths: orgFilePath={orig_file_path}, mp3FilePath={mp3_file_path}")
242 | 
243 |         return orig_file_path, mp3_file_path
244 | 
245 | 
246 |     def set_typing(self, in_flight):
247 |         # TODO: igors - can't find WA API for typing indication.
248 |         pass
249 | 
250 | 
251 |     def set_status_read(self, ctx:Context, message_id):
252 |         ctx.log("setStatusRead")
253 |         headers = {
254 |             "Authorization": f"Bearer {os.environ['WHATSAPP_BOT_TOKEN']}",
255 |             "Content-Type": "application/json",
256 |         }
257 | 
258 |         args = {
259 |             "messaging_product": "whatsapp",
260 |             "status": "read",
261 |             "message_id": message_id,
262 |         }
263 | 
264 |         try:
265 |             response = requests.post(
266 |                 f"https://graph.facebook.com/{os.environ['FACEBOOK_GRAPH_VERSION']}/{os.environ['WHATSAPP_PHONE_NUMBER_ID']}/messages",
267 |                 json=args,
268 |                 headers=headers
269 |             )
270 |             response.raise_for_status()
271 |         except requests.exceptions.RequestException as error:
272 |             ctx.log(f"setStatusRead: exception. error={error}")
273 |             return
274 | 
275 |         if response.json().get('success') != True:
276 |             ctx.log(f"setStatusRead: response is wrong. Compared field {response.json().get('success')}. Full response {response}")
277 | 


--------------------------------------------------------------------------------
/src/services/open_ai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/src/services/open_ai/__init__.py


--------------------------------------------------------------------------------
/src/services/open_ai/query_openai.py:
--------------------------------------------------------------------------------
  1 | import backoff
  2 | import json
  3 | import os
  4 | import openai
  5 | import time
  6 | import re
  7 | import requests
  8 | import traceback
  9 | from typing import Dict
 10 | 
 11 | from box import Box
 12 | from services.timers import invoke_alert_tool
 13 | 
 14 | 
 15 | from services.token_prediction import token_predictor
 16 | from infra.context import Context
 17 | from langchain.utilities import google_serper
 18 | 
 19 | OPENAI_SPEECH_TO_TEXT_MODEL = 'whisper-1'
 20 | 
 21 | openai.api_key = os.environ['OPENAI_API_KEY']
 22 | 
 23 | 
 24 | def deep_clone(o):
 25 |     return json.loads(json.dumps(o))
 26 | 
 27 | 
 28 | def convert_message_to_chat_format(message):
 29 |     converted_message = {
 30 |         "role": "assistant" if message.isSentByMe else "user",
 31 |         "content": message.body,
 32 |     }
 33 |     return converted_message
 34 | 
 35 | 
 36 | def get_system_message(ctx:Context, messenger_name):
 37 |     current_date = time.strftime("%B %d, %Y", time.gmtime()) 
 38 | 
 39 |     system_message = {
 40 |         "role": "system",
 41 |         "content": f"""You are Robot 1-X (R1X), a helpful, cheerful assistant developed by the Planet Express team and integrated into a {messenger_name} chat.
 42 | You are based on GPT-3.5 technology. More information about R1X is available at https://r1x.ai.
 43 | Today is {current_date}.
 44 | 
 45 | If Robot 1-X does not know, it truthfully says so.
 46 | If user asks for information that Robot 1-X does not have but can estimate, Robot 1-X will provide the estimate, while mentioning it is an estimate and not a fact."""
 47 |     }
 48 | 
 49 |     return system_message
 50 | 
 51 | 
 52 | def db_messages2messages(messages):
 53 |     parsed_messages = []
 54 | 
 55 |     for message in messages:
 56 |         if message.body is None:
 57 |             continue
 58 |         parsed_messages.append(convert_message_to_chat_format(message))
 59 | 
 60 |     return parsed_messages
 61 | 
 62 | 
 63 | def get_limited_message_history(ctx, messages, prompt_template):
 64 |     soft_token_limit = 2048
 65 |     hard_token_limit = 4000
 66 | 
 67 |     messages_upto_max_tokens = token_predictor.get_messages_upto_max_tokens(
 68 |         ctx, prompt_template, messages, soft_token_limit, hard_token_limit
 69 |     )
 70 | 
 71 |     if len(messages_upto_max_tokens) == 0:
 72 |         return []
 73 | 
 74 |     if messages_upto_max_tokens[0]["role"] == "assistant":
 75 |         messages_upto_max_tokens.pop(0)
 76 | 
 77 |     merged_messages = []
 78 |     prev_role = None
 79 | 
 80 |     for message in messages_upto_max_tokens:
 81 |         if message["role"] == 'assistant':
 82 |             message["content"] = message["content"].removeprefix("\N{LEFT-POINTING MAGNIFYING GLASS}: ")
 83 | 
 84 |         if message["role"] == prev_role:
 85 |             merged_messages[-1]["content"] += f"\n{message['content']}"
 86 |         else:
 87 |             merged_messages.append(message)
 88 | 
 89 |         prev_role = message["role"]
 90 | 
 91 |     return merged_messages
 92 | 
 93 | 
 94 | def get_chat_completion(ctx:Context, messenger_name, messages, direct):
 95 |     parsed_messages = deep_clone(messages) if direct else db_messages2messages(messages)
 96 | 
 97 |     system_message = get_system_message(ctx, messenger_name)
 98 |     messages_upto_max_tokens = get_limited_message_history(
 99 |         ctx, parsed_messages, system_message
100 |     )
101 | 
102 |     return get_chat_completion_core(ctx, messenger_name, messages_upto_max_tokens)
103 | 
104 | @backoff.on_exception(backoff.expo, openai.error.RateLimitError, max_tries=3)
105 | def get_chat_completion_core(ctx, messenger_name, messages, model=None):
106 |     if not model:
107 |         model = "gpt-4" if ctx.user_channel == "canary" else "gpt-3.5-turbo"
108 | 
109 |     try:
110 |         ctx.log("Messages: ", messages);
111 |         ctx.log("invoking completion request.")
112 | 
113 |         completion = chat_completion_create_wrap(ctx, model, messages)
114 | 
115 |         ctx.log("getChatCompletionCore response: ", completion['choices'][0]['message']['content'])
116 | 
117 |         return Box({
118 |             "response": completion['choices'][0]['message']['content'],
119 |             "promptTokens": completion['usage']['prompt_tokens'],
120 |             "completionTokens": completion['usage']['completion_tokens']
121 |         })
122 |     except Exception as e:
123 |         if hasattr(e, "response"):
124 |             ctx.log(f"error: e.response={e.response}")
125 |         else:
126 |             ctx.log("error: e={e}", e)
127 | 
128 |         ctx.log("error generating completion from OpenAI.")
129 |         raise e
130 | 
131 | 
132 | def get_prep_message(ctx : Context, messenger, is_final : bool) -> Dict[str, str]:
133 |     current_date = time.strftime("%B %d, %Y", time.gmtime())
134 | 
135 |     is_debug_prompt = False
136 | 
137 |     gpt_ver = 'GPT-4' if ctx.user_channel == 'canary' else 'GPT-3.5'
138 | 
139 |     prep_message_stable = {
140 |         "role" : "user",
141 |         "content" : f"""You are Robot 1-X (R1X), a helpful, cheerful assistant developed by the Planet Express team and integrated into a {messenger} chat.
142 | You are based on {gpt_ver} technology. More information about you is available at https://r1x.ai.
143 | 
144 | I will provide a CHAT between R1X and a human, wrapped with tags: <yair1xigor>CHAT</yair1xigor>. Last speaker is the user.
145 | 
146 | Your task is to provide R1X's answer.
147 | 
148 | You can invoke one of the following tools to augment your knowledge before replying:
149 | 
150 | ALERT: sets a reminder for the user. TOOL_INPUT=(seconds, text), where seconds is relative time in seconds from request to when alert should be provided. answer with an error message if the user provides an absolute time.
151 | SEARCH: performs a Google search and returns key results. Use this tool to fetch real-time, up-to-date information about world events. Its data is more reliable than your existing knowledge. TOOL_INPUT=search prompt.
152 | WEATHER: per-location 3-day weather forecast, at day granularity. It does not provide a finer-grained forecast. TOOL_INPUT=<City, Country>, both in English. TOOL_INPUT should always be a well-defined settlement and country/state. IMPORTANT: If you believe the right value for TOOL_INPUT is unknown/my location/similar, do not ask for the tool to be invoked and instead use the ANSWER format to ask the user for location information.
153 | 
154 | For invoking a tool, provide your reply wrapped in <yair1xigoresponse>REPLY</yair1xigoresponse> tags, where REPLY is in JSON format with the following fields: TOOL, TOOL_INPUT.
155 | Examples:
156 | 
157 | <yair1xigoresponse>{{ "TOOL" : "ALERT", "TOOL_INPUT" : (240, "Do the dishes") }}</yair1xigoresponse>
158 | <yair1xigoresponse>{{ "TOOL" : "SEARCH", "TOOL_INPUT" : "Who is the current UK PM?" }}</yair1xigoresponse>
159 | <yair1xigoresponse>{{ "TOOL" : "WEATHER", "TOOL_INPUT" : "Tel Aviv, Israel" }}</yair1xigoresponse>
160 | 
161 | Use these exact formats, and do not deviate.
162 | 
163 | Otherwise, provide your final reply wrapped in <yair1xigoresponse>REPLY</yair1xigoresponse> tags in a JSON format, with the following fields: ANSWER.
164 | Example:
165 | 
166 | <yair1xigoresponse>{{ "ANSWER" : "Current UK PM is Rishi Sunak" }}</yair1xigoresponse>
167 | 
168 | When providing a final answer, use this exact format, and do not deviate.
169 | IMPORTANT: ALWAYS wrap your final answer with <yair1xigoresponse> tags, and in JSON format.
170 | 
171 | Today's date is {current_date}.
172 | For up-to-date information about people, stocks and world events, ALWAYS use one of the tools available to you and DO NOT provide an answer.
173 | For fiction requests, use your knowledge and creativity to answer.
174 | If human request has no context of time, assume he is referring to current time period.
175 | All tools provided have real-time access to the internet; do not reply that you have no access to the internet, unless you have attempted to invoke the SEARCH tool first. Additionally, do not invoke a tool if the required TOOL_INPUT is unknown, vague, or not provided. Always follow the IMPORTANT note in the tool description.
176 | If you have missing data and ONLY if you cannot use the tools provided to fetch it, try to estimate; in these cases, let the user know your answer is an estimate.
177 | 
178 | Don't provide your response until you made sure it is valid, and meets all prerequisites laid out for tool invocation.
179 | 
180 | WHEN PROVIDING A FINAL ANSWER TO THE USER, NEVER MENTION THE SEARCH AND WEATHER TOOLS DIRECTLY, AND DO NOT SUGGEST THAT THE USER UTILIZES THEM.
181 | 
182 | Your thought process should follow the next steps {'audibly stating the CONCLUSION for each step number without quoting it:' if is_debug_prompt else 'silently:'}
183 | 1. Understand the human's request and formulate it as a self-contained question.
184 | 2. Decide which tool should be invoked can provide the most information, and with what input. Decide all prerequisites for the tool and show how each is met.
185 | 3. Formulate the tool invocation request, or answer, in JSON format as detailed above. IMPORTANT: THIS PART MUST BE DELIVERED IN A SINGLE LINE. DO NOT USE MULTILINE SYNTAX.
186 | 
187 | IMPORTANT: Make sure to focus on the most recent request from the user, even if it is a repeated one.""" }
188 | 
189 |     prep_message_final = {
190 |         "role" : "user",
191 |         "content" : f"""You are Robot 1-X (R1X), a helpful, cheerful assistant developed by the Planet Express team and integrated into a {messenger} chat.
192 | You are based on {gpt_ver} technology. More information about you is available at https://r1x.ai.
193 | 
194 | I will provide a CHAT between R1X and a human, wrapped with tags: <yair1xigor>CHAT</yair1xigor>. Last speaker is the user.
195 | I will also provide you with data generated by external tool invocations, which you can rely on for your answers; this data will be wrapped with tags, as such: <r1xdata>DATA</r1xdata>.
196 | 
197 | DO NOT CONTRADICT OR DOUBT THAT DATA. IT SUPERSEDES ANY OTHER DATA YOU HAVE, AND IS UP TO DATE AS OF TODAY.
198 | DO NOT MENTION TO THE USER THIS DATA WAS PROVIDED TO YOU IN ANY WAY.
199 | NEVER MENTION TO THE USER THE REPLY IS ACCORDING TO A SEARCH.
200 | DO NOT START YOUR ANSWER WITH A MAGNIFYING GLASS EMOJI; THAT WILL BE PROVIDED TO THE USER SEPARATELY, AS NEEDED.
201 | 
202 | Your task is to provide R1X's answer.
203 | 
204 | Today's date is {current_date}.
205 | You are trained with knowledge until September 2021.
206 | If you have missing data, try to estimate, and let the user know your answer is an estimate.
207 | 
208 | Your thought process should follow the next steps {'audibly stating the CONCLUSION for each step number without quoting it:' if is_debug_prompt else 'silently:'}
209 | 1. Understand the human's request and formulate it as a self-contained question.
210 | 2. Integrate all data provided to you with your current knowledge and formulate a response.
211 | 
212 | IMPORTANT: Make sure to focus on the most recent request from the user, even if it is a repeated one.""" }
213 | 
214 |     return prep_message_final if is_final else prep_message_stable
215 | 
216 | prep_reply_message = {"role": "assistant", "content": "Understood. Please provide me with the chat between R1X and the human."}
217 | 
218 | import datetime
219 | 
220 | def get_chat_completion_with_tools(ctx:Context, messenger_name, messages, direct):
221 |     try:
222 |         ctx.log("Starting getChatCompletionWithTools.")
223 | 
224 |         parsed_messages = deep_clone(messages) if direct else db_messages2messages(messages)
225 |         ctx.log({"messages": parsed_messages})
226 | 
227 |         prev_responses = []
228 | 
229 |         #system_message = get_system_message(ctx, messenger_name)
230 |         system_message = None
231 |         history = get_limited_message_history(ctx, parsed_messages, system_message)
232 | 
233 |         prompt_tokens_total = 0
234 |         completion_tokens_total = 0
235 | 
236 |         max_iterations = 2
237 |         successful_iterations = 0
238 | 
239 |         ctx.set_stat('tools-flow:tool-invocations', successful_iterations)
240 | 
241 |         for i in range(max_iterations):
242 |             ctx.log(f"Invoking completionIterativeStep #{i}")
243 | 
244 |             ctx.set_stat('tools-flow:iterations', i + 1)
245 | 
246 |             is_final = (i == (max_iterations - 1))
247 | 
248 |             result = completion_iterative_step(ctx, messenger_name, deep_clone(history), prev_responses, is_final)
249 |             answer = result['answer']
250 |             tool = result['tool']
251 |             input_ = result['input']
252 |             prompt_tokens = result['prompt_tokens']
253 |             completion_tokens = result['completion_tokens']
254 | 
255 |             ctx.log(f"completionIterativeStep done, answer={answer} tool={tool} input={input_} prompt_tokens={prompt_tokens} completion_tokens={completion_tokens}" )
256 | 
257 |             if not answer and not tool:
258 |                 break
259 | 
260 |             prompt_tokens_total += prompt_tokens
261 |             completion_tokens_total += completion_tokens
262 | 
263 |             if answer:
264 |                 ctx.log(f"Answer returned: {answer}")
265 | 
266 |                 if successful_iterations > 0:
267 |                     answer = "\N{LEFT-POINTING MAGNIFYING GLASS}: " + answer
268 | 
269 |                 ctx.set_stat('tools-flow:success', True)
270 | 
271 |                 return Box({
272 |                     "response": answer,
273 |                     "promptTokens": prompt_tokens_total,
274 |                     "completionTokens": completion_tokens_total
275 |                 })
276 | 
277 |             if tool and input_:
278 |                 successful_iterations += 1
279 |                 ctx.set_stat('tools-flow:tool-invocations', successful_iterations)
280 | 
281 |                 ctx.log(f"Invoking TOOL {tool} with INPUT {input_}")
282 |                 response, brk = invoke_tool(ctx, tool, input_, message=messages[-1])
283 |                 if brk:
284 |                     return Box({
285 |                     "response": response,
286 |                     "promptTokens": prompt_tokens_total,
287 |                     "completionTokens": completion_tokens_total
288 |                 })
289 |                 prev_responses.append(f"INVOKED TOOL={tool}, TOOL_INPUT={input_}, ACCURACY=100%, INVOCATION DATE={datetime.datetime.now().date()} RESPONSE={response}")
290 | 
291 |     except Exception as e:
292 |         ctx.log({"e": e})
293 |         traceback.print_exc();
294 | 
295 |     ctx.log("getChatCompletionWithTools: failed generating customized reply, falling back to getChatCompletion.")
296 | 
297 |     ctx.set_stat('tools-flows:success', False)
298 | 
299 |     return get_chat_completion(ctx, messenger_name, messages, direct)
300 | 
301 | def completion_iterative_step(ctx, messenger_name, history, prev_responses, is_final : bool):
302 |     result = {'answer': None, 'tool': None, 'input': None, 'prompt_tokens': None, 'completion_tokens': None}
303 | 
304 |     messages = []
305 | 
306 |     new_request = {'role': 'user', 'content': ''}
307 |     new_request['content'] += 'Here is the chat so far:\n<yair1xigor>'
308 | 
309 |     for message in history:
310 |         speaker = 'R1X' if message['role'] == 'assistant' else 'Human'
311 |         new_request['content'] += f'\n<{speaker}>: {message["content"]}'
312 | 
313 |     new_request['content'] += '\n<R1X:></yair1xigor>'
314 | 
315 |     if prev_responses:
316 |         prev_responses_flat = '\n'.join(prev_responses)
317 |         new_request['content'] += f'\nhere is the data so far:\n\n<r1xdata>{prev_responses_flat}</r1xdata>\n'
318 | 
319 |     prep_message = get_prep_message(ctx, messenger_name, is_final)
320 |     messages.append(prep_message)
321 |     messages.append(prep_reply_message)
322 | 
323 |     messages.append(new_request)
324 | 
325 |     reply = get_chat_completion_core(ctx, messenger_name, messages)
326 |     result['prompt_tokens'] = reply.promptTokens
327 |     result['completion_tokens'] = reply.completionTokens
328 | 
329 |     if is_final:
330 |         result['answer'] = reply['response']
331 |         return result
332 | 
333 |     regex = re.compile(r'<yair1xigoresponse>(.*?)<\/yair1xigoresponse>', re.DOTALL)
334 |     matches = regex.search(reply['response'])
335 | 
336 |     if not matches:
337 |         return result
338 | 
339 |     json_reply = eval(matches.group(1))
340 |     ctx.log(f'completionIterativeStep: matched response: {json_reply}')
341 | 
342 |     result['answer'] = json_reply.get('ANSWER')
343 |     if result['answer']:
344 |         return result
345 | 
346 |     if json_reply.get('TOOL') and json_reply.get('TOOL_INPUT'):
347 |         result['tool'] = json_reply.get('TOOL')
348 |         result['input'] = json_reply.get('TOOL_INPUT')
349 |         return result
350 | 
351 |     return result
352 | 
353 | def chat_completion_create_wrap(ctx: Context, model, messages):
354 |     if model == 'gpt-4':
355 |         response = openai.ChatCompletion().create(model=model, messages=messages, temperature=0.2)
356 | 
357 |         return response
358 | 
359 |     if model == 'gpt-3.5-turbo':
360 |         # TODO: cleanup per issue #55
361 |         if os.environ['AZURE_OPENAI_KEY'] == '':
362 |             return openai.ChatCompletion().create(model=model, messages=messages, temperature=0.2)
363 | 
364 |         url = "https://r1x.openai.azure.com/openai/deployments/gpt-35-turbo/chat/completions?api-version=2023-05-15"
365 | 
366 |         headers = {
367 |             "Content-Type": "application/json",
368 |             "api-key": os.environ['AZURE_OPENAI_KEY']
369 |         }
370 | 
371 |         data = {
372 |             "messages" : messages,
373 |             "temperature": 0.2
374 |         }
375 | 
376 |         response = requests.post(url, headers=headers, data=json.dumps(data)).json()
377 | 
378 |         ctx.log('Azure GPT 3.5 response:', response)
379 | 
380 |         content_filter_active = response.get('error', {}).get('code') == 'content_filter' or \
381 |                                 response.get('choices', [{}])[0].get('finish_reason') == 'content_filter'
382 | 
383 |         if content_filter_active:
384 |             ctx.log('Content filtering applied; falling back to OpenAI API.')
385 |             ctx.set_stat('completion:content-filter', True)
386 |             response = openai.ChatCompletion().create(model=model, messages=messages, temperature=0.2)
387 | 
388 |         return response
389 | 
390 |     ctx.log(f'chat_completion_create_wrap: unsupported completion model {model}.')
391 | 
392 |     assert False
393 | 
394 | def invoke_tool(ctx:Context, tool, input, message):
395 |     tool_canon = tool.strip().upper()
396 | 
397 |     if tool_canon.startswith('SEARCH'):
398 |         # Replace this with an appropriate call to the Serper module
399 |         ctx.log(f'Invoking Google search using SERPER, input={input}')
400 |         serper = google_serper.GoogleSerperAPIWrapper(serper_api_key=os.environ['SERPER_API_KEY'])
401 |         answer = serper.run(input)
402 |         ctx.log(f'SERPER search result: {answer}')
403 | 
404 |         return answer, False
405 | 
406 |     if tool_canon.startswith('WEATHER'):
407 |         answer = invoke_weather_search(ctx, input)
408 | 
409 |         return answer, False
410 |     
411 |     if tool_canon.startswith('ALERT'):
412 |         ctx.set_stat('tools-flow:tool-alert', 1)
413 |         invoke_alert_tool(ctx, input, message)
414 |         return "alert added successfully.", True
415 |         
416 | 
417 |     return None, False
418 | 
419 | def parse_geolocation(location_data):
420 |     regex = re.compile(r'^(\d+\.\d+)\° ([NSEW]),\s*(\d+\.\d+)\° ([NSEW])$')
421 |     match = regex.match(location_data)
422 | 
423 |     if not match:
424 |         return None
425 | 
426 |     lat = float(match.group(1)) * (-1 if match.group(2) == 'S' else 1)
427 |     lon = float(match.group(3)) * (-1 if match.group(4) == 'W' else 1)
428 | 
429 |     return Box({'lat': lat, 'lon': lon})
430 | 
431 | def invoke_weather_search(ctx:Context, input):
432 |     ctx.log(f'invokeWeatherSearch, input={input}')
433 | 
434 |     # Replace this with an appropriate call to the Serper module
435 |     # serper = Serper()
436 |     geo_prompt = f'{input} long lat'
437 |     ctx.log(f'Invoking geolocation search using SERPER, input={geo_prompt}')
438 | 
439 |     serper = google_serper.GoogleSerperAPIWrapper(serper_api_key=os.environ['SERPER_API_KEY'])
440 |     geo_res = serper.run(geo_prompt)
441 |     ctx.log(f'SERPER geolocation result: {geo_res}')
442 | 
443 |     geo = parse_geolocation(geo_res)
444 |     if not geo:
445 |         return None
446 | 
447 |     ctx.log(f'Geolocation: lat={geo.lat} lon={geo.lon}')
448 | 
449 |     w_res = requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={geo.lat}&longitude={geo.lon}&daily=temperature_2m_max,temperature_2m_min,precipitation_hours,precipitation_probability_max,windspeed_10m_max&forecast_days=3&timezone=auto')
450 |     w_res_json = w_res.json()
451 | 
452 |     return json.dumps(w_res_json['daily'])
453 | 
454 | def create_transcription(ctx:Context, mp3_file_path):
455 |     language = ctx.user_settings.get('transcription.lang', None)
456 |     ctx.log(f'createTranscription: preferred user language is {language}')
457 | 
458 |     t0 = time.time()
459 | 
460 |     transcript = openai.Audio.transcribe(
461 |         file = open(mp3_file_path, "rb"),
462 |         model = OPENAI_SPEECH_TO_TEXT_MODEL,
463 |         language = language
464 |     )
465 | 
466 |     transcription = transcript['text']
467 |     time_taken = int((time.time() - t0) * 1000)
468 | 
469 |     ctx.log(f'createTranscription: timeTaken={time_taken}ms transcription={transcription}')
470 | 
471 |     return transcription
472 | 


--------------------------------------------------------------------------------
/src/services/timers.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import datetime
 3 | import time
 4 | import traceback
 5 | from typing import Tuple
 6 | from infra import logger, utils 
 7 | from infra.context import Context
 8 | utils.load_env()
 9 | import db_models
10 | from services.messengers import messenger_factory
11 | 
12 | def invoke_alert_tool(ctx:Context, alert_args:Tuple[int, str], parsed_message):
13 |     messenger_chat_id = f"{parsed_message.source}:{parsed_message.chatId}"
14 |     timestamp = int(parsed_message.messageTimestamp.timestamp())
15 |     ref_id = parsed_message.messageId
16 |     
17 |     with db_models.Session() as session:
18 |         now = datetime.datetime.now()
19 |         delta_ts, topic = alert_args
20 |         timer_extra_data = {"topic":topic, "ref_id":ref_id}
21 |         trigger_ts = datetime.datetime.fromtimestamp(timestamp+ int(delta_ts))
22 |         timer = db_models.Timer(
23 |             chat_id=messenger_chat_id,
24 |             trigger_timestamp=trigger_ts, 
25 |             data=timer_extra_data,
26 |             created_at=now,
27 |             updated_at=now
28 |         )
29 | 
30 |         session.add(timer)
31 |         session.commit()
32 |         session.refresh(timer)
33 | 
34 |         session.close()
35 | 
36 |     return timer
37 | 
38 | def alert_users():
39 |     ctx = Context()
40 |     while True:
41 |         try:
42 |             now = datetime.datetime.utcnow()
43 |             with db_models.Session() as session:
44 |                 alerts = session.query(db_models.Timer).filter(db_models.Timer.trigger_timestamp <= now).all()
45 |             if alerts:
46 |                 ctx.log(f"[TIMER] found {len(alerts)} alerts")                
47 |                 
48 |                 for alert in alerts:
49 |                     try:
50 |                         topic = alert.data.get("topic", None)
51 |                         quote_id = alert.data.get("ref_id", None)
52 |                         messenger = messenger_factory.make_messenger(alert.chat_id)
53 |                         ctx.log(f"[TIMER] sending a timer message to chat id {alert.chat_id}")
54 |                         messenger.send_message(ctx, { 
55 |                             "kind": "text",
56 |                             "body": f"You asked me to remind you about {topic}" if topic else "You asked me to remind you",
57 |                             "quote_id":quote_id
58 |                         })
59 |                     except:
60 |                         ctx.log(f"[TIMER] failed to send alert {alert.id} to chat id:{alert.chat_id} ")
61 |                 delete_alerts(ctx, now)                        
62 |             time.sleep(5)
63 | 
64 |         except Exception as e:
65 |             logger.logger.error(f'Exception occurred; {e}; stack trace: ', traceback.format_exc()) 
66 | 
67 | def delete_alerts(ctx:Context, now:datetime.datetime) -> None:
68 |     with db_models.Session() as session:
69 |         session.query(db_models.Timer).filter(db_models.Timer.trigger_timestamp <= now).delete()
70 |         ctx.log("[TIMER] alerts deleted")
71 |         session.commit()   


--------------------------------------------------------------------------------
/src/services/token_prediction/token_predictor.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import tiktoken
 4 | 
 5 | # global variable to hold the encode objects between invocations
 6 | encoder = tiktoken.get_encoding("cl100k_base")
 7 | 
 8 | def _num_tokens_from_messages(messages):
 9 |     num_tokens = 0
10 |     for message in messages:
11 |         num_tokens += 4
12 |         for key, value in message.items():
13 |             num_tokens += len(encoder.encode(value))
14 |             if key == "name":
15 |                 num_tokens -= 1
16 | 
17 |     num_tokens += 2
18 |     num_tokens += 1
19 |     return num_tokens
20 | 
21 | def _get_message_tokens(message):
22 |     if len(message) == 0:
23 |         raise ValueError(f"message is malformed. It's {message} but doesn't have any keys")
24 | 
25 |     num_tokens = 0
26 |     num_tokens += 4
27 |     for key, value in message.items():
28 |         num_tokens += len(encoder.encode(value))
29 |         if key == "name":
30 |             num_tokens -= 1
31 | 
32 |     return num_tokens
33 | 
34 | def _get_message_index_upto_max_tokens(system_message, chat_messages, soft_token_limit, hard_token_limit):
35 |     num_tokens = 0
36 |     num_tokens += 2
37 |     num_tokens += 1
38 | 
39 |     include_system_message = False
40 |     start_index = len(chat_messages)
41 | 
42 |     if system_message != None:
43 |         num_tokens += _get_message_tokens(system_message)
44 | 
45 |     if num_tokens > hard_token_limit:
46 |         return [include_system_message, start_index]
47 | 
48 |     include_system_message = (system_message != None)
49 | 
50 |     num_messages = 0
51 | 
52 |     for start_index in range(len(chat_messages), 0, -1):
53 |         message = chat_messages[start_index - 1]
54 | 
55 |         num_tokens += _get_message_tokens(message)
56 | 
57 |         if num_tokens <= soft_token_limit:
58 |             num_messages += 1
59 |             continue
60 | 
61 |         if start_index == len(chat_messages) and num_tokens <= hard_token_limit:
62 |             num_messages += 1
63 |             continue
64 | 
65 |         break
66 | 
67 |     return [include_system_message, len(chat_messages) - num_messages]
68 | 
69 | def get_messages_upto_max_tokens(ctx, system_message, chat_messages, soft_token_limit, hard_token_limit):
70 |     ctx.log(f"getMessagesUptoMaxTokens: chatMessages.length={len(chat_messages)}, softTokenLimit={soft_token_limit}, hardTokenLimit={hard_token_limit}")
71 | 
72 |     include_system_message, start_index = _get_message_index_upto_max_tokens(system_message, chat_messages, soft_token_limit, hard_token_limit)
73 | 
74 |     result = [system_message] if include_system_message else []
75 | 
76 |     if start_index == len(chat_messages):
77 |         return result
78 | 
79 |     result += chat_messages[start_index:]
80 | 
81 |     return result
82 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/test/__init__.py


--------------------------------------------------------------------------------
/test/data/backref_user1.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "Is Pele alive?" },
3 |     { "role" : "assistant", "content" : "According to the latest information available, Pele passed away on December 29, 2022, at the Albert Einstein Israelite Hospital in São Paulo, State of São Paulo, Brazil." },
4 |     { "role" : "user", "content" : "what is its radius?" },
5 |     { "role" : "assistant", "content" : "I'm sorry, I'm not sure what you are referring to. Could you please provide more context or clarify your question?" },
6 |     { "role" : "user", "content" : "tennis ball" }
7 | ] }
8 | 
9 | 


--------------------------------------------------------------------------------
/test/data/brazil.json:
--------------------------------------------------------------------------------
 1 | { "messages" : [
 2 |     {
 3 |       "role": "assistant",
 4 |       "content": "ANSWER: I apologize, but I cannot provide any information about Guy Caspary, as I could not find any relevant or notable persons with that name using my search capabilities. Can you please provide any additional context or details that may help me better understand who you are asking about?"
 5 |     },
 6 |     { "role": "user", "content": "Who is Guy Caspary?" },
 7 |     {
 8 |       "role": "assistant",
 9 |       "content": "Guy Caspary is an individual who has a professional profile on LinkedIn. Beyond this, I could not find any additional information about him or his background using my tools."
10 |     },
11 |     {
12 |       "role": "user",
13 |       "content": "What's the weather in Milan like tomorrow?"
14 |     },
15 |     {
16 |       "role": "assistant",
17 |       "content": "The weather in Milan, Italy tomorrow, according to the most recent forecast, is expected to be partly cloudy with a high of 21°C (70°F) and a low of 9°C (48°F)."
18 |     },
19 |     { "role": "user", "content": "Who is Aviram Yeruchami?" },
20 |     {
21 |       "role": "assistant",
22 |       "content": "Aviram Yeruchami is a Director of Engineering at Cisco, specifically for the company's Leaba division. Not much else is available in the public domain about his personal or professional background."
23 |     },
24 |     { "role": "user", "content": "What should I wear tomorrow?" },
25 |     {
26 |       "role": "assistant",
27 |       "content": "Tool=search, tool_input=What is the weather forecast for tomorrow in my location?"
28 |     },
29 |     { "role": "user", "content": "What should I wear tomorrow?" },
30 |     {
31 |       "role": "assistant",
32 |       "content": "Based on the weather forecast for your location, I would recommend checking the expected high and low temperatures for tomorrow, as well as the expected weather conditions (i.e. sunny, rainy, cloudy) to determine what to wear. If it is expected to be warm and sunny, you may want to wear light and comfortable clothing, while if it is expected to be cooler and potentially rainy, you may want to wear a jacket or other appropriate clothing to keep you warm and dry."
33 |     },
34 |     { "role": "user", "content": "I am located in Milan." },
35 |     {
36 |       "role": "assistant",
37 |       "content": "Understood. Is there a specific question or topic you would like me to help with related to Milan?"
38 |     },
39 |     { "role": "user", "content": "What should I wear tomorrow?" },
40 |     {
41 |       "role": "assistant",
42 |       "content": "ANSWER: Based on the weather forecast for Milan, Italy, tomorrow, which is partly cloudy with highs of 21°C (70°F) and lows of 9°C (48°F), it is recommended that you wear comfortable clothing such as light shirts, pants or skirts, and a jacket or sweater to keep you warm in case the temperature drops. It's advisable to wear clothes in layers, to be prepared for fluctuations in temperatures."
43 |     },
44 | 
45 |     {
46 |       "role": "user",
47 |       "content": "Who is the current president of Brasil?"
48 |     }
49 | ] }
50 | 


--------------------------------------------------------------------------------
/test/data/comprehension/backref_ai1.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     {"role" : "user", "content" : "Tell me i'm nice and ask me if i like to hear it again"},
3 |     {"role" : "assistant", "content" : "You are nice. Would you like to hear it again?"},
4 |     {"role" : "user", "content" : "yes"}
5 | ] }
6 | 
7 | 


--------------------------------------------------------------------------------
/test/data/comprehension/backref_ai2.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     {"role" : "user", "content" : "Tell me i'm nice and ask me if i like to hear it again"},
3 |     {"role" : "assistant", "content" : "You are nice. Would you like to hear it again?"},
4 |     {"role" : "user", "content" : "no"}
5 | ] }
6 | 
7 | 


--------------------------------------------------------------------------------
/test/data/fiction/story.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "write me a story about Yair the Great, and Igor the Furious, two sages." }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/data/fiction/story2.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "write a story about Abigail, Yael and Renena, three princesses riding dragons." }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/data/finance/btc.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "what was the value of btc yesterday?" }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/data/finance/twostocks.json.fail:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "what was the value of intc and csco yesterday?" }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/data/misc/42.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "42" }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/data/pele1.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "Is Pele alive?" }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/data/ukpm.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "Who is the UK PM?" }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/data/weather1.json:
--------------------------------------------------------------------------------
1 | { "messages" : [
2 |     { "role" : "user", "content" : "Is it going to be cold tomorrow?" }
3 | ] }
4 | 
5 | 


--------------------------------------------------------------------------------
/test/local-test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import os
 4 | import sys
 5 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
 6 | 
 7 | from infra.utils import load_env
 8 | load_env()
 9 | 
10 | import sys
11 | import json
12 | import asyncio
13 | from pathlib import Path
14 | from infra.logger import logger, create_logging_context
15 | from services.open_ai.query_openai import get_chat_completion_with_tools
16 | 
17 | def run():
18 |     args = sys.argv[1:]
19 | 
20 |     # Check if the user specified any command line arguments
21 |     if not args:
22 |         print("No arguments provided.")
23 |         sys.exit(1)
24 | 
25 |     json_input = args[0]
26 | 
27 |     with open(json_input, 'r', encoding='utf-8') as file:
28 |         data = file.read()
29 |     history = json.loads(data)["messages"]
30 | 
31 |     ctx = create_logging_context(0)
32 |     ctx.user_channel = 'stable'
33 |     reply = get_chat_completion_with_tools(ctx, 'WhatsApp', history, True)
34 | 
35 |     print({'reply': reply})
36 | 
37 | run()
38 | 


--------------------------------------------------------------------------------
/test/test-token-predictor.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | const assert = require('assert');
 3 | require("dotenv").config();
 4 | const tokenPredictor = require(".././src/services/token-prediction/token-predictor");
 5 | 
 6 | async function main() {
 7 |     const systemMessage = {"role": "system", "content": "you are a helpful bot"};
 8 | 
 9 |     const chatMessages = [
10 |          {"role": "user", "content": "i'm your lord"},
11 |          {"role": "assistant", "content": "you'd wish"},
12 |          {"role": "user", "content": "מה אתה אומר"},
13 |          {"role": "assistant", "content": "מה שאתה שומע"}
14 |     ];
15 | 
16 |     // build the expected behavior. a map from number of tokens --> message list using the numTokensFromMessages predictor
17 |     const numTokensToMessages = new Map();
18 |     const previousMessages = [];
19 |     let previousNumTokens = 0;
20 |     const chatMessagesReversed = chatMessages.slice().reverse(); // make a copy of chatMessages and reverse it
21 |     
22 |     for (let numChatMessages = 0; numChatMessages <= chatMessagesReversed.length ; ++numChatMessages) {
23 |         // take the first numChatMessages from the end
24 |         const subsetChatMessagesReversed = chatMessagesReversed.slice(0, numChatMessages);
25 | 
26 |         // get back the normal order 
27 |         const subsetChatMessages = subsetChatMessagesReversed.slice().reverse();
28 | 
29 |         // build the full list message
30 |         const combinedMessages = [systemMessage, ...subsetChatMessages];
31 | 
32 |         // calculate their number of tokens
33 |         const numTokens = await tokenPredictor.numTokensFromMessages(combinedMessages);
34 | 
35 |         // fill out the entries of token sizes for the unset sizes so far in the message map
36 |         for (const token = previousNumTokens; token < numTokens; token++) {
37 |             numTokensToMessages[token] = previousMessages;
38 |         }
39 | 
40 |         // if this is the last iteration then nothing will fill in for it (no next), so do now
41 |         if (numChatMessages == chatMessagesReversed.length) {
42 |             
43 |             numTokensToMessages[numTokens] = combinedMessages;
44 |         }
45 | 
46 |         // update for next iteration
47 |         previousMessages = combinedMessages;
48 |         previousNumTokens = numTokens;
49 |     }
50 |     // console.log(numTokensToMessages);
51 |      
52 |       
53 |     // verify that the message selector picks up exactly the expected messages. Go +10 beyond the last previousNumTokens just for checks.
54 |     for (const maxTokens = 0; maxTokens < previousNumTokens+10; maxTokens ++) {
55 |         const actualMessages = await tokenPredictor.getMessagesUptoMaxTokens(systemMessage, chatMessages, maxTokens);
56 | 
57 |         //console.log(`maxTokens=${maxTokens}\n  actual=${JSON.stringify(actualMessages)}\nexpected=${JSON.stringify(numTokensToMessages[maxTokens])}`);
58 |         try {
59 |             // if the number of tokens still exist int the map, compare to the map
60 |             if (maxTokens <= previousNumTokens){
61 |                 assert.deepStrictEqual(actualMessages, numTokensToMessages[maxTokens]);
62 |             } else {
63 |                 //compare the the full list
64 |                 assert.deepStrictEqual(actualMessages, previousMessages);
65 |             }
66 |         } catch (error) {
67 |             console.log(`Actual and expected message list are different for ${maxTokens}\n`, error);
68 |             throw error;
69 |         }
70 |     }
71 |     
72 |     // Get the index of the message within the chatMessages list from which messages should be taken.
73 |     // const res = await tokenPredictor.getMessageIndexUptoMaxTokens(systemMessage, chatMessages, 17); console.log(res);
74 | 
75 |     // Get the number of tokens a list of messages should consume
76 |     // tokens = await tokenPredictor.numTokensFromMessages(combined);  console.log("numTokensFromMessages", tokens);
77 | }
78 | 
79 | main();
80 | 
81 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yairl/r1xbot/5323f61a4f77b0de20b3801537e9c10d84ba93b3/tools/__init__.py


--------------------------------------------------------------------------------
/tools/delete_chat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import os
 4 | import sys
 5 | import json
 6 | import psycopg2
 7 | from dotenv import load_dotenv
 8 | from datetime import datetime
 9 | 
10 | def connect_to_db():
11 |     stage = os.environ['R1X_STAGE'] if 'R1X_STAGE' in os.environ else 'dev'
12 |     print('Connecting to %s environment...' % stage)
13 |     load_dotenv('.env.%s' % stage)
14 |     connection_string = os.getenv('DB_CONNECTION_STRING')
15 |     conn = psycopg2.connect(connection_string)
16 |     return conn
17 | 
18 | def delete_history(source, chat_id):
19 |     conn = connect_to_db()
20 |     cursor = conn.cursor()
21 | 
22 |     cursor.execute("DELETE FROM \"Messages\" WHERE source = %s AND \"chatId\" = %s", (source, chat_id,)) 
23 | 
24 |     conn.commit()
25 |     cursor.close()
26 |     conn.close()
27 | 
28 | if __name__ == "__main__":
29 |     if len(sys.argv) < 3:
30 |         print("Usage: python script.py source chat_id")
31 |         sys.exit(1)
32 | 
33 |     source = sys.argv[1]
34 |     chat_id = sys.argv[2]
35 | 
36 |     delete_history(source, chat_id)
37 | 
38 | 


--------------------------------------------------------------------------------
/tools/extract.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import re
 4 | import json
 5 | import argparse
 6 | 
 7 | def extract_messages(log_file, output_file):
 8 |     with open(log_file, 'r') as log, open(output_file, 'w') as out:
 9 |         log_content = log.read()
10 |         pattern = r"Starting getChatCompletionWithTools\.([\s\S]*?)(parsedMessages: \[[\s\S]*?\])"
11 |         matches = re.findall(pattern, log_content)
12 | 
13 |         if matches:
14 |             last_instance = matches[-1][-1]
15 | 
16 |             role_pattern = r"role: (?:'([^']*)'|\"([^\"]*)\")"
17 |             #content_pattern = r"content: (?:'([^']*)'|\"([^\"]*)\")"
18 |             content_pattern = r"content:\s*(?:'([^']+)'|\"([^\"]+)\")"
19 | 
20 |             roles = [role[0] or role[1] for role in re.findall(role_pattern, last_instance)]
21 |             contents = [content[0] + content[1] for content in re.findall(content_pattern, last_instance, re.MULTILINE | re.DOTALL )]
22 | 
23 |             messages = { "messages" : [{"role": role, "content": content} for role, content in zip(roles, contents)] }
24 | 
25 |             with open(output_file, 'w') as out:
26 |                 json.dump(messages, out, indent=2)
27 |         else:
28 |             print("No matching instances found in the log file.")
29 | 
30 | # Replace 'input.log' and 'output.json' with your actual log and output file names
31 | if __name__ == '__main__':
32 |     parser = argparse.ArgumentParser(description='Extract messages from a log file and save them to an output JSON file.')
33 |     parser.add_argument('--input', required=True, help='Path to the input log file.')
34 |     parser.add_argument('--output', required=True, help='Path to the output JSON file.')
35 | 
36 |     args = parser.parse_args()
37 | 
38 |     extract_messages(args.input, args.output)
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/tools/multi_sender.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import argparse
 4 | import sys
 5 | import os
 6 | from typing import Dict, List
 7 | 
 8 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
 9 | from infra import utils 
10 | from infra.context import Context
11 | from services.messengers.messenger_factory import make_messenger
12 | 
13 | utils.load_env()
14 | 
15 | 
16 | def multi_send(ctx:Context, full_chat_ids: List[str], attrs: Dict[str,str]):
17 |     for full_chat_id in full_chat_ids:
18 |         messenger = make_messenger(full_chat_id)
19 |         response = messenger.send_message(ctx, attrs)
20 |         print(response)
21 | 
22 |         should_send_contact = attrs['contact_name'] and attrs['contact_handle']
23 |         if should_send_contact:
24 |             response = messenger.send_contact(ctx, attrs['contact_name'], attrs['contact_handle'])
25 |             print(response)
26 | 
27 | if __name__ == '__main__':
28 |     parser = argparse.ArgumentParser(description='Send a message to multiple chat ids.')
29 |     
30 |     parser.add_argument('--message', required=False, help='Message string.')
31 |     parser.add_argument('--file', required=False, help='Message string, in file.')
32 |     parser.add_argument('--chat_ids', required=True, help='a comma seperated list of <messenger wa/tg>:<chat ids> e.g wa:12346578,tg:456789654 ')
33 |     parser.add_argument('--contact-name', required=False, action='store', help='''Send contact. Name is the contact's name.''')
34 |     parser.add_argument('--contact-handle', required=False, action='store', help='''Send contact. Handle is contact's handle in WhatsApp/Telegram.''')
35 | 
36 |     args = parser.parse_args()
37 |     
38 |     if not args.message and not args.file:
39 |         print('No message provided. Use --message or --file')
40 | 
41 |     if args.message:
42 |         msg = args.message
43 |     else:
44 |         msg = open(args.file, 'r').read()
45 | 
46 |     full_chat_ids=args.chat_ids.split(',')
47 | 
48 |     ctx = Context()
49 |     multi_send(ctx, full_chat_ids, {
50 |             "kind": "text",
51 |             "body": msg,
52 |             "contact_name" : args.contact_name,
53 |             "contact_handle" : args.contact_handle
54 |         })
55 |     
56 |     
57 | 


--------------------------------------------------------------------------------
/tools/stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import argparse
 4 | import dotenv
 5 | import numpy
 6 | import os
 7 | import psycopg2
 8 | import psycopg2.extras
 9 | 
10 | dotenv.load_dotenv('.env.prod')
11 | ps = psycopg2.connect(os.environ['DB_CONNECTION_STRING'])
12 | 
13 | cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor)
14 | 
15 | def get_message_count(start_date, end_date):
16 |     cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor)
17 | 
18 |     cur.execute('''SELECT COUNT(id) FROM "Messages" WHERE DATE("createdAt") >= '%s' AND DATE("createdAt") <= '%s';''' % (start_date, end_date))
19 |     message_count = cur.fetchall()[0][0]
20 | 
21 |     return message_count
22 | 
23 | def get_active_chats_count(start_date, end_date):
24 |     cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor)
25 | 
26 |     cur.execute('''SELECT COUNT(DISTINCT (source, "chatId")) FROM "Messages" WHERE DATE("createdAt") >= '%s' AND DATE("createdAt") <= '%s';''' % (start_date, end_date))
27 |     active_chat_count = cur.fetchall()[0][0]
28 | 
29 |     return active_chat_count
30 | 
31 | def get_active_chat_histogram(start_date, end_date):
32 |     cur = ps.cursor(cursor_factory=psycopg2.extras.DictCursor)
33 | 
34 |     # Select <source>:<chat>, so this data can be used later to send messages to specific users.
35 |     cur.execute('''SELECT source, "chatId", chat_id_count FROM (SELECT source, "chatId", COUNT(*) as chat_id_count FROM "Messages" WHERE DATE("createdAt") >= '%s' AND DATE("createdAt") <= '%s' GROUP BY source, "chatId") AS chat_count_table ORDER BY chat_id_count DESC;''' % (start_date, end_date))
36 | 
37 |     chats = []
38 |     for member in cur.fetchall():
39 |         chats.append(member)
40 | 
41 |     return chats
42 | 
43 | parser = argparse.ArgumentParser(description='Fetch statistics from R1X database.')
44 | parser.add_argument('--start-date', type=str, dest='start_date', help='Start date.', required=True)
45 | parser.add_argument('--end-date', type=str, dest='end_date', help='End date.', required=True)
46 | args = parser.parse_args()
47 | 
48 | num_msgs = get_message_count(args.start_date, args.end_date)
49 | print('Number of messages: ', num_msgs)
50 | 
51 | chats = get_active_chat_histogram(args.start_date, args.end_date)
52 | print('Active chats today: ', len(chats))
53 | 
54 | msg_arr = []
55 | 
56 | numbers = [] 
57 | 
58 | for chat in chats:
59 |     (source, chat_id, msgs) = chat
60 |     msg_arr.append(msgs)
61 | 
62 |     if source == 'wa':
63 |         numbers.append(f'{source}:{chat_id}')
64 | 
65 |     if msgs < 8:
66 |         continue
67 |     
68 |     print(source, chat_id, msgs)
69 | 
70 | print(','.join(numbers))
71 | 
72 | print(numpy.histogram(msg_arr, [0, 5, 10, 15, 20, 50, 100]))
73 | 
74 | 


--------------------------------------------------------------------------------
/tools/user_settings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import os
  4 | import sys
  5 | import json
  6 | import psycopg2
  7 | from dotenv import load_dotenv
  8 | from datetime import datetime
  9 | 
 10 | def connect_to_db():
 11 |     stage = os.environ['R1X_STAGE'] if 'R1X_STAGE' in os.environ else 'dev'
 12 |     print('Connecting to %s environment...' % stage)
 13 |     load_dotenv('.env.%s' % stage)
 14 |     connection_string = os.getenv('DB_CONNECTION_STRING')
 15 |     conn = psycopg2.connect(connection_string)
 16 |     return conn
 17 | 
 18 | def get_settings(user_id):
 19 |     conn = connect_to_db()
 20 |     cursor = conn.cursor()
 21 | 
 22 |     cursor.execute("SELECT * FROM user_settings WHERE user_id = %s ORDER BY id DESC", (user_id,))
 23 |     row = cursor.fetchone()
 24 | 
 25 |     if row:
 26 |         print("Settings for user_id {}: {}".format(user_id, row))
 27 |     else:
 28 |         print("No settings found for user_id {}".format(user_id))
 29 | 
 30 |     cursor.close()
 31 |     conn.close()
 32 | 
 33 | def set_setting(user_id, key_value_pairs):
 34 |     conn = connect_to_db()
 35 |     cursor = conn.cursor()
 36 | 
 37 |     cursor.execute("SELECT settings FROM user_settings WHERE user_id = %s", (user_id,))
 38 |     row = cursor.fetchone()
 39 | 
 40 |     settings = row[0] if row else {}
 41 | 
 42 |     for pair in key_value_pairs:
 43 |         key, value = pair.split("=")
 44 |         settings[key] = value
 45 | 
 46 |     cursor.execute('INSERT INTO user_settings (user_id, settings, version, "createdAt", "updatedAt") VALUES (%s, %s, 1, %s, %s)',
 47 |                    (user_id, json.dumps(settings), datetime.now(), datetime.now()))
 48 | 
 49 |     conn.commit()
 50 |     cursor.close()
 51 |     conn.close()
 52 | 
 53 | def clear_setting(user_id, key):
 54 |     conn = connect_to_db()
 55 |     cursor = conn.cursor()
 56 | 
 57 |     cursor.execute("SELECT settings FROM user_settings WHERE user_id = %s", (user_id,))
 58 |     row = cursor.fetchone()
 59 | 
 60 |     if row:
 61 |         settings = row[0]
 62 |         if key in settings:
 63 |             del settings[key]
 64 |             cursor.execute("UPDATE user_settings SET settings = %s WHERE user_id = %s", (json.dumps(settings), user_id))
 65 |             conn.commit()
 66 |         else:
 67 |             print("Key not found in settings for user_id {}".format(user_id))
 68 |     else:
 69 |         print("No settings found for user_id {}".format(user_id))
 70 | 
 71 |     cursor.close()
 72 |     conn.close()
 73 | 
 74 | if __name__ == "__main__":
 75 |     if len(sys.argv) < 3:
 76 |         print("Usage: python script.py [get|set|clear] user_id [key=value [key=value]...]")
 77 |         sys.exit(1)
 78 | 
 79 |     action = sys.argv[1]
 80 |     user_id = sys.argv[2]
 81 | 
 82 |     if action == "get":
 83 |         get_settings(user_id)
 84 |     elif action == "set":
 85 |         if len(sys.argv) < 4:
 86 |             print("Usage: python script.py set user_id key=value [key=value]...")
 87 |             sys.exit(1)
 88 | 
 89 |         key_value_pairs = sys.argv[3:]
 90 |         set_setting(user_id, key_value_pairs)
 91 |     elif action == "clear":
 92 |         if len(sys.argv) < 4:
 93 |             print("Usage: python script.py clear user_id key")
 94 |             sys.exit(1)
 95 | 
 96 |         key = sys.argv[3]
 97 |         clear_setting(user_id, key)
 98 |     else:
 99 |         print("Invalid action. Use get, set, or clear.")
100 |         sys.exit(1)
101 | 
102 | 


--------------------------------------------------------------------------------