├── .catgitinclude
├── requirements.txt
├── src
    ├── extras
    │   ├── README.md
    │   ├── bot_token_test.py
    │   └── api_fetch_news.py
    ├── token_usage_visualization.py
    ├── rss_feeds.py
    ├── api_get_global_time.py
    ├── rag_elasticsearch
    │   ├── elasticsearch_backend_search.py
    │   ├── backup_database.py
    │   ├── elasticsearch_find_empty_question_fields.py
    │   ├── elasticsearch_find_and_delete_entry.py
    │   ├── elasticsearch_test_search.py
    │   ├── review_and_fix_entries.py
    │   └── qa_to_json.py
    ├── api_get_time.py
    ├── api_get_additional_weather_data.py
    ├── timedate_handler.py
    ├── configmerger.py
    ├── perplexity_handler.py
    ├── api_get_maptiler.py
    ├── api_key.py
    ├── calc_module.py
    ├── api_get_website_dump.py
    ├── utils.py
    ├── reminder_poller.py
    ├── api_get_openrouteservice.py
    ├── config_paths.py
    ├── api_get_stock_prices.py
    ├── api_get_stock_prices_alphavantage.py
    ├── url_handler.py
    ├── voice_message_handler.py
    ├── elasticsearch_handler.py
    ├── api_get_nws_weather.py
    ├── api_get_weatherapi.py
    ├── bot_token.py
    ├── reminder_handler.py
    ├── api_perplexity_search.py
    └── bot_commands.py
├── docker_deploy.sh
├── .gitignore
├── Dockerfile
├── .github
    └── workflows
    │   └── build-and-push.yml
├── docker_setup.sh
└── config
    └── config.ini


/.catgitinclude:
--------------------------------------------------------------------------------
 1 | # to include in `catgit` (see https://github.com/FlyingFathead/catgit for more)
 2 | src/db_utils.py
 3 | src/bot_token.py
 4 | src/config_paths.py
 5 | src/custom_functions.py
 6 | src/main.py
 7 | src/modules.py
 8 | src/text_message_handler.py
 9 | src/utils.py
10 | config/config.ini
11 | src/reminder_poller.py
12 | src/reminder_handler.py
13 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | apscheduler>=3.11.0
 2 | beautifulsoup4>=4.12.3
 3 | configparser>=6.0.0
 4 | elastic-transport>=8.15.0
 5 | elasticsearch>=8.15.1
 6 | ffmpeg-python>=0.2.0
 7 | httpx>=0.25.2
 8 | langdetect>=1.0.9
 9 | matplotlib>=3.8.2
10 | holidays>=0.49
11 | lxml>=5.2.2
12 | nltk>=3.8.1
13 | openai>=1.6.1
14 | pydub>=0.25.1
15 | python-telegram-bot>=20.7
16 | transformers>=4.36.2
17 | requests>=2.31.0
18 | pytz>=2024.1
19 | timezonefinder>=6.4.0
20 | yfinance>=0.2.41
21 | yt-dlp>=2024.3.10
22 | feedparser>=6.0.11
23 | tiktoken>=0.7.0


--------------------------------------------------------------------------------
/src/extras/README.md:
--------------------------------------------------------------------------------
 1 | # Extras
 2 | 
 3 | This directory contains experimental or unimplemented modules. The scripts here are works in progress and may not be fully functional yet. They are included for future development and testing purposes.
 4 | 
 5 | ## Contents
 6 | 
 7 | - **`api_fetch_news.py`**  
 8 |   A work-in-progress script intended to fetch news articles from various sources via APIs. Not yet fully implemented or integrated.
 9 | 
10 | ## Notes
11 | 
12 | - These modules are not part of the core functionality of the bot and may change significantly as development continues.
13 | - Feel free to experiment with these modules, but please be aware that they may contain bugs or incomplete features.
14 | 
15 | 


--------------------------------------------------------------------------------
/src/extras/bot_token_test.py:
--------------------------------------------------------------------------------
 1 | # test to see if your TG bot token is available in the environment
 2 | 
 3 | import os
 4 | import logging
 5 | 
 6 | # Set up basic logging
 7 | logging.basicConfig(level=logging.INFO)
 8 | 
 9 | def get_bot_token():
10 |     bot_token = os.getenv('TELEGRAM_BOT_TOKEN')
11 |     if not bot_token:
12 |         logging.error("Failed to retrieve TELEGRAM_BOT_TOKEN from environment.")
13 |         return None
14 |     return bot_token
15 | 
16 | if __name__ == "__main__":
17 |     token = get_bot_token()
18 |     if token:
19 |         logging.info(f"Successfully retrieved bot token: {token[:4]}... (masked for security)")
20 |     else:
21 |         logging.critical("No bot token found. Exiting.")
22 |         exit(1)
23 | 


--------------------------------------------------------------------------------
/src/token_usage_visualization.py:
--------------------------------------------------------------------------------
 1 | # token_usage_visualization.py
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import json
 5 | 
 6 | def generate_usage_chart(token_usage_file, output_image_file):
 7 |     try:
 8 |         with open(token_usage_file, 'r') as file:
 9 |             data = json.load(file)
10 | 
11 |         dates = list(data.keys())
12 |         usage = list(data.values())
13 | 
14 |         plt.figure(figsize=(10, 6))
15 |         plt.bar(dates, usage, color='blue')
16 |         plt.xlabel('Date')
17 |         plt.ylabel('Token Usage')
18 |         plt.xticks(rotation=45)
19 |         plt.title('Daily Token Usage')
20 |         plt.tight_layout()
21 |         plt.savefig(output_image_file)
22 | 
23 |     except Exception as e:
24 |         print(f"Error generating usage chart: {e}")
25 |         return None
26 | 


--------------------------------------------------------------------------------
/src/rss_feeds.py:
--------------------------------------------------------------------------------
 1 | # rss_feeds.py
 2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
 4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 | import feedparser
 7 | from utils import sanitize_html, split_message
 8 | 
 9 | RSS_FEED_URLS = {
10 |     'is_tuoreimmat': 'https://www.is.fi/rss/tuoreimmat.xml',
11 |     'your_custom_rss': 'http://example.com/rss'
12 |     # Add more RSS feed URLs as needed
13 | }
14 | 
15 | async def fetch_rss_feed(feed_key):
16 |     """Fetch and format the RSS feed based on the feed key."""
17 |     feed_url = RSS_FEED_URLS.get(feed_key)
18 |     if not feed_url:
19 |         return f"Unknown RSS feed key: {feed_key}"
20 |     
21 |     feed = feedparser.parse(feed_url)
22 |     formatted_entries = "\n".join([f"{entry.title}: {entry.link}" for entry in feed.entries[:5]])
23 |     return formatted_entries
24 | 


--------------------------------------------------------------------------------
/src/extras/api_fetch_news.py:
--------------------------------------------------------------------------------
 1 | # api_fetch_news.py
 2 | 
 3 | import httpx
 4 | 
 5 | async def fetch_news(api_key: str, query: str):
 6 |     url = "https://newsapi.org/v2/everything"
 7 |     params = {
 8 |         "q": query,
 9 |         "apiKey": api_key,
10 |         "language": "en",
11 |     }
12 |     async with httpx.AsyncClient() as client:
13 |         response = await client.get(url, params=params)
14 |         if response.status_code == 200:
15 |             news_data = response.json()
16 |             articles = news_data.get("articles", [])
17 |             messages = []
18 |             for article in articles[:5]:  # Limit to the first 5 articles
19 |                 title = article["title"]
20 |                 url = article["url"]
21 |                 messages.append(f"{title}\nRead more: {url}")
22 |             return "\n\n".join(messages)
23 |         else:
24 |             return "Failed to fetch news."
25 | 


--------------------------------------------------------------------------------
/docker_deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CONTAINER_NAME="telegrambot-openai-api"
 4 | IMAGE_NAME="telegrambot-openai-api"
 5 | 
 6 | # Function to stop and remove existing container
 7 | cleanup() {
 8 |     echo "Stopping container if it's running..."
 9 |     sudo docker stop ${CONTAINER_NAME} || true
10 | 
11 |     echo "Removing container if it exists..."
12 |     sudo docker rm ${CONTAINER_NAME} || true
13 | }
14 | 
15 | # Function to build and run the container
16 | deploy() {
17 |     echo "Building Docker image..."
18 |     sudo docker build --no-cache -t ${IMAGE_NAME} .
19 |     if [[ $? -ne 0 ]]; then
20 |         echo "Error: Docker image build failed."
21 |         exit 1
22 |     fi
23 | 
24 |     echo "Running Docker container..."
25 |     sudo docker run --env-file .env --name ${CONTAINER_NAME} -d ${IMAGE_NAME}
26 |     if [[ $? -ne 0 ]]; then
27 |         echo "Error: Failed to run the Docker container."
28 |         exit 1
29 |     fi
30 | 
31 |     echo "Deployment complete."
32 | }
33 | 
34 | # Execute the functions
35 | cleanup
36 | deploy
37 | 


--------------------------------------------------------------------------------
/src/api_get_global_time.py:
--------------------------------------------------------------------------------
 1 | # api_get_global_time.py
 2 | 
 3 | import subprocess
 4 | 
 5 | TIMEZONES = [
 6 |     "UTC", "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles",
 7 |     "Europe/London", "Europe/Paris", "Europe/Berlin", "Europe/Helsinki", "Asia/Tokyo",
 8 |     "Asia/Shanghai", "Australia/Sydney", "Asia/Kolkata", "America/Sao_Paulo"
 9 | ]
10 | 
11 | def get_time_for_timezone(timezone):
12 |     try:
13 |         command = f"TZ={timezone} date +'%Y-%m-%d %H:%M:%S %Z'"
14 |         result = subprocess.run(command, shell=True, capture_output=True, text=True)
15 |         
16 |         if result.returncode != 0:
17 |             return f"Failed to fetch time for timezone {timezone}: {result.stderr.strip()}"
18 |         
19 |         return result.stdout.strip()
20 |     except Exception as e:
21 |         return f"Error executing date command for timezone {timezone}: {str(e)}"
22 | 
23 | async def get_global_time():
24 |     times = {}
25 |     for timezone in TIMEZONES:
26 |         times[timezone] = get_time_for_timezone(timezone)
27 |     return times


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore the cookies.txt file in config
 2 | cookies.txt
 3 | config/cookies.txt
 4 | 
 5 | # Ignore all token files
 6 | # i.e. files named bot_token.txt and api_token.txt, etc.
 7 | **/api_token.txt
 8 | **/bot_token.txt
 9 | config/*.txt
10 | config/bot_token.txt
11 | config/api_token.txt
12 | api_token.txt
13 | bot_token.txt
14 | chat_history.txt
15 | **/chat_history.txt
16 | 
17 | # Bot-specific data
18 | token_usage.json
19 | **/data/
20 | data/
21 | 
22 | # Log files
23 | logs/
24 | **/logs/
25 | chat.log
26 | bot.log
27 | *.log
28 | 
29 | # Python cache files
30 | **/__pycache__/
31 | __pycache__/
32 | *.pyc
33 | 
34 | # audio messages directory
35 | audio_messages/
36 | **/audio_messages/
37 | 
38 | # yt-dlp cache directory
39 | .cache/
40 | **/.cache/
41 | 
42 | # (alt
43 | # transcriptions directory
44 | transcriptions/
45 | # audio files directory
46 | audio/
47 | 
48 | # ignore potential transcribed files
49 | *.ogg
50 | *.mp3
51 | *.wav
52 | *.aiff
53 | *.aac
54 | *.mp4
55 | *.vtt
56 | *.srt
57 | *.part
58 | 
59 | # Ignore virtual environments
60 | venv/
61 | env/
62 | .env


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11-slim-bookworm
 2 | 
 3 | # Install dependencies & Rust
 4 | RUN apt-get update && apt-get install -y \
 5 |     ffmpeg \
 6 |     lynx \
 7 |     gcc \
 8 |     git \
 9 |     curl \
10 |     && apt-get clean \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | # Install Rust using rustup
14 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
15 | 
16 | # Add Rust to PATH
17 | ENV PATH="/root/.cargo/bin:${PATH}"
18 | 
19 | WORKDIR /app
20 | 
21 | # Copy the requirements file first to leverage Docker cache
22 | COPY requirements.txt .
23 | 
24 | # Install Python dependencies
25 | RUN pip3 install --no-cache-dir -r requirements.txt
26 | 
27 | # Remove build dependencies to reduce image size
28 | RUN apt-get update && apt-get remove -y curl gcc git && apt-get autoremove -y && \
29 |     rm -rf /root/.cargo /root/.rustup /var/lib/apt/lists/*
30 | 
31 | # Copy the entire project into the container
32 | COPY . .
33 | 
34 | # Set environment variables for Docker runtime
35 | ENV PYTHONUNBUFFERED=1
36 | ENV RUNNING_IN_DOCKER=true
37 | 
38 | # Optional: Debugging tools (disable in production)
39 | # RUN ls -lsa
40 | # RUN pwd
41 | 
42 | # Default command to run the application
43 | CMD ["python3", "src/main.py"]
44 | 


--------------------------------------------------------------------------------
/src/rag_elasticsearch/elasticsearch_backend_search.py:
--------------------------------------------------------------------------------
 1 | # elasticsearch_backend_search.py
 2 | 
 3 | from elasticsearch import Elasticsearch
 4 | 
 5 | # Function to search Elasticsearch
 6 | def search_es(es, index, field, search_term):
 7 |     query = {
 8 |         "query": {
 9 |             "wildcard": {
10 |                 field: f"*{search_term}*"
11 |             }
12 |         },
13 |         "size": 5
14 |     }
15 |     response = es.search(index=index, body=query)
16 |     return response
17 | 
18 | # Connect to Elasticsearch
19 | es = Elasticsearch(["http://localhost:9200"])
20 | 
21 | # Check the connection
22 | if es.ping():
23 |     print("Connected to Elasticsearch!")
24 | else:
25 |     print("Could not connect to Elasticsearch.")
26 |     exit(1)
27 | 
28 | # Ask user for search term
29 | search_term = input("Enter search term: ")
30 | 
31 | # Define the index and field to search on
32 | index = "tg-bot-rag-index"  # Replace with your index name
33 | field = "content"  # Replace with the field you want to search
34 | 
35 | # Perform the search
36 | result = search_es(es, index, field, search_term)
37 | 
38 | # Print the search results in a Discord-friendly format
39 | print("Search Results:")
40 | for hit in result['hits']['hits']:
41 |     print(f"Document ID: {hit['_id']}\nSnippet: {hit['_source'][field][:200]}...")  # Print the first 200 characters
42 |     print("---")


--------------------------------------------------------------------------------
/src/rag_elasticsearch/backup_database.py:
--------------------------------------------------------------------------------
 1 | # backup_database.py
 2 | # backup database into a json dump (recommended!)
 3 | 
 4 | import json
 5 | from elasticsearch import Elasticsearch, helpers
 6 | from datetime import datetime
 7 | 
 8 | # Connect to Elasticsearch
 9 | es = Elasticsearch(["http://localhost:9200"])
10 | index_name = 'tg-bot-rag-index'
11 | 
12 | def backup_current_state(es, index_name):
13 |     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
14 |     backup_file = f'current_backup_{timestamp}.json'
15 |     
16 |     query = {
17 |         "query": {
18 |             "match_all": {}
19 |         },
20 |         "size": 10000
21 |     }
22 |     
23 |     response = es.search(index=index_name, body=query, scroll='2m')
24 |     scroll_id = response['_scroll_id']
25 |     hits = response['hits']['hits']
26 |     
27 |     all_hits = []
28 |     all_hits.extend(hits)
29 |     
30 |     while len(hits) > 0:
31 |         response = es.scroll(scroll_id=scroll_id, scroll='2m')
32 |         scroll_id = response['_scroll_id']
33 |         hits = response['hits']['hits']
34 |         all_hits.extend(hits)
35 |     
36 |     # Collect all documents into a list
37 |     all_documents = [hit["_source"] for hit in all_hits]
38 |     
39 |     # Write the list of documents to the backup file
40 |     with open(backup_file, 'w', encoding='utf-8', errors='replace') as f:
41 |         json.dump(all_documents, f, ensure_ascii=False, indent=4)
42 |     
43 |     print(f"Backup completed to {backup_file}")
44 | 
45 | backup_current_state(es, index_name)
46 | 


--------------------------------------------------------------------------------
/src/rag_elasticsearch/elasticsearch_find_empty_question_fields.py:
--------------------------------------------------------------------------------
 1 | # elasticsearc_find_empty_question_fields.py
 2 | 
 3 | from elasticsearch import Elasticsearch
 4 | 
 5 | def find_empty_questions(index_name):
 6 |     es = Elasticsearch(["http://localhost:9200"])  # Adjust the connection details as necessary
 7 | 
 8 |     query = {
 9 |         "query": {
10 |             "bool": {
11 |                 "should": [
12 |                     {"bool": {"must_not": {"exists": {"field": "question"}}}},
13 |                     {"term": {"question.keyword": ""}},
14 |                     {"script_score": {
15 |                         "query": {"match_all": {}},
16 |                         "script": {
17 |                             "source": "if (doc['question'].size() == 0) return 1; return doc['question'].value == null || doc['question'].value.isEmpty() ? 1 : 0;",
18 |                             "lang": "painless"
19 |                         }
20 |                     }}
21 |                 ],
22 |                 "minimum_should_match": 1
23 |             }
24 |         }
25 |     }
26 | 
27 |     response = es.search(index=index_name, body=query)
28 |     print(f"Found {response['hits']['total']['value']} documents with empty or missing 'question' fields.")
29 | 
30 |     # Example handling: Print out the document IDs
31 |     for doc in response['hits']['hits']:
32 |         print(f"Document ID: {doc['_id']}")
33 | 
34 | if __name__ == "__main__":
35 |     index_name = "tg-bot-rag-index"  # Replace with your index
36 |     # index_name = "your_index_name"  # Replace with your actual index name
37 |     find_empty_questions(index_name)


--------------------------------------------------------------------------------
/.github/workflows/build-and-push.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Push to GHCR
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | env:
10 |   REGISTRY: ghcr.io
11 |   IMAGE_NAME: flyingfathead/telegrambot-openai-api
12 | 
13 | jobs:
14 |   build-and-push:
15 |     runs-on: ubuntu-latest
16 |     permissions:
17 |       contents: read
18 |       packages: write
19 | 
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Log in to the Container registry
25 |         uses: docker/login-action@v3
26 |         with:
27 |           registry: ${{ env.REGISTRY }}
28 |           username: ${{ github.actor }}
29 |           password: ${{ secrets.GITHUB_TOKEN }}
30 | 
31 |       - name: Set up Docker Buildx
32 |         uses: docker/setup-buildx-action@v3
33 | 
34 |       - name: Build and push Docker image
35 |         uses: docker/build-push-action@v5
36 |         with:
37 |           context: .
38 |           push: true
39 |           tags: |
40 |             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
41 |             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
42 |           cache-from: type=gha
43 |           cache-to: type=gha,mode=max
44 | 
45 |       - name: Stop and remove any existing container
46 |         run: |
47 |           docker stop telegrambot-openai-api || true
48 |           docker rm telegrambot-openai-api || true
49 | 
50 |       - name: Run Docker container with environment variables
51 |         run: |
52 |           docker run \
53 |             --name telegrambot-openai-api \
54 |             --env OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
55 |             --env TELEGRAM_BOT_TOKEN=${{ secrets.TELEGRAM_BOT_TOKEN }} \
56 |             -d ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
57 | 


--------------------------------------------------------------------------------
/src/rag_elasticsearch/elasticsearch_find_and_delete_entry.py:
--------------------------------------------------------------------------------
 1 | # elasticsearch_find_and_delete_entry.py
 2 | 
 3 | # find and delete mistaken entries from the elasticsearch database
 4 | 
 5 | from elasticsearch import Elasticsearch
 6 | 
 7 | def search_qa_pairs(es, index_name, search_term):
 8 |     query = {
 9 |         "query": {
10 |             "multi_match": {
11 |                 "query": search_term,
12 |                 "fields": ["question", "answer"]
13 |             }
14 |         }
15 |     }
16 |     response = es.search(index=index_name, body=query)
17 |     return response['hits']['hits']
18 | 
19 | def delete_document(es, index_name, doc_id):
20 |     response = es.delete(index=index_name, id=doc_id)
21 |     return response
22 | 
23 | def main():
24 |     es = Elasticsearch(["http://localhost:9200"])
25 |     index_name = "tg-bot-rag-index"  # Adjust the index name as needed
26 |     
27 |     search_term = input("Enter a search term to find Q&A pairs: ")
28 |     hits = search_qa_pairs(es, index_name, search_term)
29 |     
30 |     if hits:
31 |         print("Found Q&A pairs:")
32 |         for hit in hits:
33 |             print(f"ID: {hit['_id']}, Question: {hit['_source']['question']}, Answer: {hit['_source']['answer']}")
34 |         
35 |         delete_id = input("Enter the ID of the document to delete (leave blank to cancel): ").strip()
36 |         if delete_id:
37 |             confirm = input(f"Are you sure you want to delete the document with ID {delete_id}? (y/n): ").strip().lower()
38 |             if confirm == 'y':
39 |                 response = delete_document(es, index_name, delete_id)
40 |                 print(f"Document with ID {delete_id} has been deleted. Response: {response}")
41 |             else:
42 |                 print("Deletion cancelled.")
43 |     else:
44 |         print("No Q&A pairs found with the given search term.")
45 | 
46 | if __name__ == "__main__":
47 |     main()


--------------------------------------------------------------------------------
/src/api_get_time.py:
--------------------------------------------------------------------------------
 1 | # api_get_time.py
 2 | # for fetching time according to coordinates; placeholder/WIP
 3 | 
 4 | from timezonefinder import TimezoneFinder
 5 | from datetime import datetime
 6 | import pytz
 7 | import httpx  # For making requests to a geocoding API
 8 | 
 9 | def get_coordinates_for_location(location_name: str) -> tuple:
10 | 
11 |     # Fetches the latitude and longitude for a given location name.
12 |     # This function uses a geocoding API to convert location names to coordinates.
13 |     # Replace 'Your_API_Key_Here' with your actual API key for the geocoding service.
14 | 
15 |     api_url = f"https://api.opencagedata.com/geocode/v1/json?q={location_name}&key=Your_API_Key_Here"
16 |     try:
17 |         response = httpx.get(api_url)
18 |         data = response.json()
19 |         # Extracting the first result as an example. You might want to refine this for accuracy.
20 |         coordinates = data['results'][0]['geometry']
21 |         return coordinates['lat'], coordinates['lng']
22 |     except Exception as e:
23 |         print(f"Error fetching coordinates for location '{location_name}': {e}")
24 |         return None, None
25 | 
26 | # Determines the local time for a given location name.
27 | def get_local_time_for_location(location_name: str) -> str:
28 |     
29 |     lat, lng = get_coordinates_for_location(location_name)
30 |     if lat is None or lng is None:
31 |         return "Could not determine the coordinates for the location."
32 | 
33 |     # Find the time zone for the given coordinates
34 |     tf = TimezoneFinder()
35 |     timezone_str = tf.timezone_at(lat=lat, lng=lng)
36 |     if timezone_str is None:
37 |         return "Could not determine the time zone for the location."
38 | 
39 |     # Get the current time in the determined time zone
40 |     timezone = pytz.timezone(timezone_str)
41 |     local_time = datetime.now(timezone)
42 |     return local_time.strftime('%Y-%m-%d %H:%M:%S %Z%z')


--------------------------------------------------------------------------------
/src/rag_elasticsearch/elasticsearch_test_search.py:
--------------------------------------------------------------------------------
 1 | # elasticsearch_test_search.py
 2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
 4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 | from elasticsearch import Elasticsearch
 7 | 
 8 | # Function to search Elasticsearch
 9 | def search_es(es, index, field, search_term):
10 |     query = {
11 |         "query": {
12 |             "bool": {
13 |                 "should": [
14 |                     {"match_phrase": {field: {"query": search_term, "slop": 50}}},
15 |                     {"match": {field: {"query": search_term, "operator": "or"}}}
16 |                 ]
17 |             }
18 |         },
19 |         "highlight": {
20 |             "fields": {
21 |                 field: {
22 |                     "fragment_size": 200,
23 |                     "number_of_fragments": 5,
24 |                     "max_analyzed_offset": 1000000  # Adjust this value as needed
25 |                 }
26 |             },
27 |             "pre_tags": ["["],
28 |             "post_tags": ["]"]
29 |         }
30 |     }
31 |     response = es.search(index=index, body=query, size=10)
32 |     return response
33 | 
34 | # Connect to Elasticsearch
35 | es = Elasticsearch(["http://localhost:9200"])
36 | 
37 | # Check the connection
38 | if es.ping():
39 |     print("Connected to Elasticsearch!")
40 | else:
41 |     print("Could not connect to Elasticsearch.")
42 |     exit(1)
43 | 
44 | # Ask user for search term
45 | search_term = input("Enter search term: ")
46 | 
47 | # Define the index and field to search on
48 | index = "tg-bot-rag-index"  # Replace with your index
49 | field = "content"  # Replace with the field you want to search
50 | 
51 | # Perform the search
52 | result = search_es(es, index, field, search_term)
53 | 
54 | # Print the search results
55 | print("Search Results:")
56 | for hit in result['hits']['hits']:
57 |     # print("Document ID:", hit["_id"])
58 |     # print("Score:", hit["_score"])  # Optional: Display the relevance score
59 |     if "highlight" in hit:
60 |         print("Highlighted Snippets:")
61 |         for highlight in hit["highlight"][field]:
62 |             print(highlight)
63 |     print("---\n")


--------------------------------------------------------------------------------
/src/api_get_additional_weather_data.py:
--------------------------------------------------------------------------------
 1 | # api_get_additional_weather_data.py
 2 | 
 3 | import logging
 4 | import sys
 5 | import os
 6 | import re
 7 | import subprocess
 8 | import asyncio
 9 | 
10 | ## NOTE: this is ONLY for example purposes!
11 | async def get_additional_data_dump():
12 |     try:
13 |         # Execute the lynx command and capture the output
14 |         command = 'lynx --dump -nolist https://www.foreca.fi/'
15 |         process = await asyncio.create_subprocess_shell(
16 |             command,
17 |             stdout=asyncio.subprocess.PIPE,
18 |             stderr=asyncio.subprocess.PIPE
19 |         )
20 | 
21 |         stdout, stderr = await process.communicate()
22 | 
23 |         if stderr:
24 |             logging.error(f"Error in get_additional_data_dump: {stderr.decode()}")
25 |             return "Error fetching data."
26 | 
27 |         output = stdout.decode()
28 | 
29 |         # Regular expressions to trim the output
30 |         start_marker = r'Suomen sää juuri nyt'
31 |         end_marker = r'Foreca YouTubessa'
32 |         trimmed_output = re.search(rf'{start_marker}(.*?){end_marker}', output, re.DOTALL)
33 | 
34 |         # Return the trimmed output if markers are found
35 |         if trimmed_output:
36 |             debug_output = trimmed_output.group(1)
37 | 
38 |             # Parsing the specific weather forecast section
39 |             parsed_forecast = parse_foreca_data(debug_output)
40 | 
41 |             # Format the parsed data for output
42 |             formatted_forecast = f"{parsed_forecast}"
43 | 
44 |             # Print the output for debugging
45 |             logging.info(formatted_forecast)
46 | 
47 |             return formatted_forecast
48 |         else:
49 |             return "Start or stop marker not found in the output."
50 | 
51 |     except Exception as e:
52 |         # Handle errors (e.g., lynx not installed, network issues)
53 |         logging.error(f"Exception in get_additional_data_dump: {e}")
54 |         return str(e)
55 |     
56 | def parse_foreca_data(data):
57 |     # Regular expressions to identify the start and end of the desired section
58 |     start_marker = r'Sääennuste koko maahan'
59 |     end_marker = r'Lähipäivien sää'
60 | 
61 |     # Extract the section
62 |     forecast_section = re.search(rf'{start_marker}(.*?){end_marker}', data, re.DOTALL)
63 |     if forecast_section:
64 |         forecast_data = forecast_section.group(1).strip()
65 |         # Further parsing can be done here to extract regional forecasts
66 |         # Format the data for output
67 |         return forecast_data
68 |     else:
69 |         return "Relevant weather forecast section not found."
70 | 
71 | # Example usage
72 | # Assuming 'output' contains the lynx dump
73 | 
74 | # Example usage
75 | if __name__ == "__main__":
76 |     # Create an asyncio event loop
77 |     loop = asyncio.get_event_loop()
78 | 
79 |     # Run the async function inside asyncio.run()
80 |     result = loop.run_until_complete(get_additional_data_dump())
81 |     
82 |     # Print the result
83 |     print(result)
84 | 


--------------------------------------------------------------------------------
/src/timedate_handler.py:
--------------------------------------------------------------------------------
 1 | # timedate_handler.py
 2 | import datetime
 3 | import pytz
 4 | 
 5 | # Maps English day names from strftime() -> Finnish
 6 | fi_days = {
 7 |     "Monday": "maanantai",
 8 |     "Tuesday": "tiistai",
 9 |     "Wednesday": "keskiviikko",
10 |     "Thursday": "torstai",
11 |     "Friday": "perjantai",
12 |     "Saturday": "lauantai",
13 |     "Sunday": "sunnuntai"
14 | }
15 | 
16 | # Maps English month names -> Finnish “month in the partitive case” for typical date usage
17 | fi_months = {
18 |     "January": "tammikuuta",
19 |     "February": "helmikuuta",
20 |     "March": "maaliskuuta",
21 |     "April": "huhtikuuta",
22 |     "May": "toukokuuta",
23 |     "June": "kesäkuuta",
24 |     "July": "heinäkuuta",
25 |     "August": "elokuuta",
26 |     "September": "syyskuuta",
27 |     "October": "lokakuuta",
28 |     "November": "marraskuuta",
29 |     "December": "joulukuuta"
30 | }
31 | 
32 | def get_ordinal_suffix(day_num: int) -> str:
33 |     """
34 |     Returns the English ordinal suffix for a given day of the month, e.g. 
35 |     1->"1st", 2->"2nd", 3->"3rd", 4->"4th", etc.
36 |     """
37 |     if 11 <= (day_num % 100) <= 13:
38 |         return "th"
39 |     elif day_num % 10 == 1:
40 |         return "st"
41 |     elif day_num % 10 == 2:
42 |         return "nd"
43 |     elif day_num % 10 == 3:
44 |         return "rd"
45 |     else:
46 |         return "th"
47 | 
48 | def get_english_timestamp_str(now_utc: datetime.datetime) -> str:
49 |     """
50 |     Returns an English-formatted date/time string, e.g.:
51 |     'Monday, April 9th, 2025 | Time (UTC): 12:34:56'
52 |     """
53 |     day_of_week_eng = now_utc.strftime("%A")    # e.g. "Monday"
54 |     month_name_eng  = now_utc.strftime("%B")    # e.g. "April"
55 |     day_num         = int(now_utc.strftime("%d"))
56 |     year_str        = now_utc.strftime("%Y")
57 |     suffix          = get_ordinal_suffix(day_num)
58 |     date_str        = f"{month_name_eng} {day_num}{suffix}, {year_str}"
59 |     time_str        = now_utc.strftime("%H:%M:%S")  # "12:34:56"
60 |     
61 |     return f"{day_of_week_eng}, {date_str} | Time (UTC): {time_str}"
62 | 
63 | def get_finnish_timestamp_str(now_utc: datetime.datetime) -> str:
64 |     """
65 |     Returns a Finnish-formatted date/time string. For example:
66 |     'maanantai, 9. huhtikuuta 2025, klo 15:34:56 Suomen aikaa'
67 |     
68 |     (Adjust as you like for Finnish grammar.)
69 |     """
70 |     helsinki_tz = pytz.timezone("Europe/Helsinki")
71 |     now_fin = now_utc.astimezone(helsinki_tz)
72 | 
73 |     weekday_eng = now_fin.strftime("%A")        # e.g. "Monday"
74 |     day_of_week_fi = fi_days.get(weekday_eng, weekday_eng)
75 | 
76 |     month_eng  = now_fin.strftime("%B")         # e.g. "April"
77 |     month_fi   = fi_months.get(month_eng, month_eng)
78 | 
79 |     day_num    = int(now_fin.strftime("%d"))    # e.g. 9
80 |     year_str   = now_fin.strftime("%Y")         # e.g. "2025"
81 | 
82 |     # For Finnish style we might do e.g. "9. huhtikuuta 2025"
83 |     date_str_fi = f"{day_num}. {month_fi} {year_str}"
84 | 
85 |     time_str_fi = now_fin.strftime("%H:%M:%S")  # "15:34:56"
86 |     # For instance: "maanantai, 9. huhtikuuta 2025, klo 15:34:56 Suomen aikaa"
87 |     return f"{day_of_week_fi}, {date_str_fi}, klo {time_str_fi} Suomen aikaa"
88 | 


--------------------------------------------------------------------------------
/src/configmerger.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import re
 3 | from config_paths import CONFIG_PATH
 4 | 
 5 | def update_config(main_config_file, custom_config_file):
 6 |     # Read the custom configuration into a dictionary
 7 |     custom_config = {}
 8 |     with open(custom_config_file, 'r') as file:
 9 |         for line in file:
10 |             if "=" in line and not line.startswith("#"):
11 |                 key, value = line.split('=', 1)
12 |                 custom_config[key.strip()] = value.strip()
13 | 
14 |     # Update the main configuration file
15 |     updated_lines = []
16 |     updated_keys = []
17 |     with open(main_config_file, 'r') as file:
18 |         for line in file:
19 |             if "=" in line and not line.startswith("#"):
20 |                 key = line.split('=', 1)[0].strip()
21 |                 if key in custom_config:
22 |                     line = f"{key} = {custom_config[key]}\n"
23 |                     updated_keys.append(key)
24 |             updated_lines.append(line)
25 | 
26 |     # Write the updated lines back to the main config file
27 |     with open(main_config_file, 'w') as file:
28 |         file.writelines(updated_lines)
29 | 
30 |     # Inform user about the updated keys
31 |     if updated_keys:
32 |         print("The following parameters have been updated:")
33 |         for key in updated_keys:
34 |             print(f"- {key}")
35 |     else:
36 |         print("No parameters were updated.")
37 | 
38 | if __name__ == "__main__":
39 |     if len(sys.argv) != 2:
40 |         print("Usage: configmerger.py <custom_config_file>")
41 |         sys.exit(1)
42 | 
43 |     main_config_file = CONFIG_PATH
44 |     custom_config_file = sys.argv[1]
45 | 
46 |     update_config(main_config_file, custom_config_file)
47 |     print(f"Configuration from {custom_config_file} has been merged into {main_config_file}.")
48 | 
49 | # ---
50 | # # // (old method)
51 | # import sys
52 | # import re
53 | 
54 | # def update_config(main_config_file, custom_config_file):
55 | #     # Read the custom configuration into a dictionary
56 | #     custom_config = {}
57 | #     with open(custom_config_file, 'r') as file:
58 | #         for line in file:
59 | #             if "=" in line and not line.startswith("#"):
60 | #                 key, value = line.split('=', 1)
61 | #                 custom_config[key.strip()] = value.strip()
62 | 
63 | #     # Update the main configuration file
64 | #     updated_lines = []
65 | #     with open(main_config_file, 'r') as file:
66 | #         for line in file:
67 | #             if "=" in line and not line.startswith("#"):
68 | #                 key = line.split('=', 1)[0].strip()
69 | #                 if key in custom_config:
70 | #                     line = f"{key} = {custom_config[key]}\n"
71 | #             updated_lines.append(line)
72 | 
73 | #     # Write the updated lines back to the main config file
74 | #     with open(main_config_file, 'w') as file:
75 | #         file.writelines(updated_lines)
76 | 
77 | # if __name__ == "__main__":
78 | #     if len(sys.argv) != 3:
79 | #         print("Usage: configmerger.py <main_config_file> <custom_config_file>")
80 | #         sys.exit(1)
81 | 
82 | #     main_config_file = sys.argv[1]
83 | #     custom_config_file = sys.argv[2]
84 | 
85 | #     update_config(main_config_file, custom_config_file)
86 | #     print(f"Configuration from {custom_config_file} has been merged into {main_config_file}.")


--------------------------------------------------------------------------------
/src/rag_elasticsearch/review_and_fix_entries.py:
--------------------------------------------------------------------------------
  1 | # review_and_fix_entries.py
  2 | 
  3 | import json
  4 | from elasticsearch import Elasticsearch, helpers
  5 | 
  6 | # Configuration
  7 | es = Elasticsearch(["http://localhost:9200"])
  8 | index_name = 'tg-bot-rag-index'
  9 | problematic_answer = "<[get_defcon_status]>"
 10 | backup_file = 'backup_before_correction.json'
 11 | 
 12 | def fetch_problematic_entries(es, index_name, problematic_answer):
 13 |     query = {
 14 |         "query": {
 15 |             "match": {
 16 |                 "answer": problematic_answer
 17 |             }
 18 |         },
 19 |         "size": 10000
 20 |     }
 21 |     
 22 |     response = es.search(index=index_name, body=query, scroll='2m')
 23 |     scroll_id = response['_scroll_id']
 24 |     hits = response['hits']['hits']
 25 |     
 26 |     all_hits = []
 27 |     all_hits.extend(hits)
 28 |     
 29 |     while len(hits) > 0:
 30 |         response = es.scroll(scroll_id=scroll_id, scroll='2m')
 31 |         scroll_id = response['_scroll_id']
 32 |         hits = response['hits']['hits']
 33 |         all_hits.extend(hits)
 34 |     
 35 |     return all_hits
 36 | 
 37 | def save_backup(entries, backup_file):
 38 |     with open(backup_file, 'w', encoding='utf-8', errors='replace') as f:
 39 |         json.dump(entries, f, ensure_ascii=False, indent=4)
 40 |     print(f"Backup completed to {backup_file}")
 41 | 
 42 | def review_and_fix_entries(entries):
 43 |     corrected_entries = []
 44 |     index = 0
 45 | 
 46 |     while index < len(entries):
 47 |         entry = entries[index]
 48 |         print(f"\nQuestion: {entry['_source']['question']}")
 49 |         print(f"Answer: {entry['_source']['answer']}")
 50 |         action = input("Enter action (n = next, p = previous, s = skip, e = edit, d = delete): ").strip().lower()
 51 |         
 52 |         if action == 'e':
 53 |             new_answer = input("Enter the new answer: ").strip()
 54 |             entry['_source']['answer'] = new_answer
 55 |             corrected_entries.append(entry)
 56 |             print("Entry updated.")
 57 |             index += 1
 58 |         elif action == 's':
 59 |             index += 1
 60 |         elif action == 'd':
 61 |             confirm_delete = input("Are you sure you want to delete this entry? (y/n): ").strip().lower()
 62 |             if confirm_delete == 'y':
 63 |                 entry['_source'] = None  # Mark for deletion
 64 |                 corrected_entries.append(entry)
 65 |                 print("Entry marked for deletion.")
 66 |             index += 1
 67 |         elif action == 'p':
 68 |             if index > 0:
 69 |                 index -= 1
 70 |             else:
 71 |                 print("You are at the first entry.")
 72 |         elif action == 'n':
 73 |             index += 1
 74 |         else:
 75 |             print("Invalid action. Please use n, p, s, e, or d.")
 76 | 
 77 |     return corrected_entries
 78 | 
 79 | def apply_corrections(es, index_name, entries):
 80 |     actions = []
 81 |     for entry in entries:
 82 |         if entry['_source'] is None:
 83 |             actions.append({
 84 |                 "_op_type": "delete",
 85 |                 "_index": index_name,
 86 |                 "_id": entry['_id']
 87 |             })
 88 |         else:
 89 |             actions.append({
 90 |                 "_op_type": "index",
 91 |                 "_index": index_name,
 92 |                 "_id": entry['_id'],
 93 |                 "_source": entry['_source']
 94 |             })
 95 |     
 96 |     helpers.bulk(es, actions)
 97 |     print(f"Applied corrections to {len(entries)} entries in '{index_name}'.")
 98 | 
 99 | # Fetch problematic entries
100 | problematic_entries = fetch_problematic_entries(es, index_name, problematic_answer)
101 | 
102 | # Save backup of problematic entries
103 | save_backup(problematic_entries, backup_file)
104 | 
105 | # Review and fix entries
106 | corrected_entries = review_and_fix_entries(problematic_entries)
107 | 
108 | # Apply corrections to Elasticsearch
109 | apply_corrections(es, index_name, corrected_entries)
110 | 


--------------------------------------------------------------------------------
/docker_setup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # horizontal line
  4 | function hzline() { printf '%*s\n' "${COLUMNS:-$(tput cols)}" '' | tr ' ' - ; }
  5 | 
  6 | # Check if Docker is installed
  7 | if ! [ -x "$(command -v docker)" ]; then
  8 |   echo 'Error: Docker is not installed. Please install Docker first: https://www.docker.com/get-started/' >&2
  9 |   exit 1
 10 | fi
 11 | 
 12 | # Welcome Message
 13 | echo "" &&
 14 | hzline &&
 15 | echo "::: Welcome to the TelegramBot-OpenAI-API setup." &&
 16 | echo "::: Source code & repo: https://github.com/FlyingFathead/TelegramBot-OpenAI-API/" &&
 17 | hzline &&
 18 | echo
 19 | 
 20 | # Check if .env file already exists and prompt the user
 21 | if [ -f .env ]; then
 22 |   echo "Warning: A .env file already exists in this directory."
 23 |   while true; do
 24 |     read -p "Do you want to overwrite the existing .env file? (y/n): " yn
 25 |     case $yn in
 26 |       [Yy]* ) break;;
 27 |       [Nn]* ) echo "Exiting setup without overwriting .env file."; exit 0;;
 28 |       * ) echo "Please answer yes or no.";;
 29 |     esac
 30 |   done
 31 | fi
 32 | 
 33 | # Function to check for empty or invalid inputs for required keys
 34 | validate_input() {
 35 |   if [[ -z "$1" || ${#1} -lt 10 ]]; then
 36 |     echo "Error: Input cannot be blank or too short (must be at least 10 characters). Please try again."
 37 |     return 1
 38 |   fi
 39 |   return 0
 40 | }
 41 | 
 42 | # Prompt for required API keys (OpenAI and Telegram)
 43 | while true; do
 44 |   read -p "Please enter your OpenAI API key (required): " OPENAI_API_KEY
 45 |   validate_input "$OPENAI_API_KEY" && break
 46 | done
 47 | 
 48 | while true; do
 49 |   read -p "Please enter your Telegram Bot API Token (required): " TELEGRAM_BOT_TOKEN
 50 |   validate_input "$TELEGRAM_BOT_TOKEN" && break
 51 | done
 52 | 
 53 | # Prompt for optional API keys (user can leave them blank)
 54 | hzline &&
 55 | echo "::: Below are optional keys for the bot's supported API functions." &&
 56 | echo "::: They're not required for basic functionality, but are a great enhancement." &&
 57 | echo "::: If you don't have an API key right now, just press ENTER to leave them blank." &&
 58 | hzline &&
 59 | read -p "Please enter your Perplexity API key (optional): " PERPLEXITY_API_KEY
 60 | read -p "Please enter your OpenWeatherMap API key (optional): " OPENWEATHERMAP_API_KEY
 61 | read -p "Please enter your WeatherAPI key (optional): " WEATHERAPI_KEY
 62 | read -p "Please enter your MapTiler API key (optional): " MAPTILER_API_KEY
 63 | read -p "Please enter your Openrouteservice API key (optional): " OPENROUTESERVICE_API_KEY
 64 | 
 65 | # Create a .env file with the required and optional keys
 66 | hzline &&
 67 | echo "Generating .env file..."
 68 | cat <<EOL > .env
 69 | OPENAI_API_KEY=$OPENAI_API_KEY
 70 | TELEGRAM_BOT_TOKEN=$TELEGRAM_BOT_TOKEN
 71 | OPENWEATHERMAP_API_KEY=$OPENWEATHERMAP_API_KEY
 72 | WEATHERAPI_KEY=$WEATHERAPI_KEY
 73 | MAPTILER_API_KEY=$MAPTILER_API_KEY
 74 | OPENROUTESERVICE_API_KEY=$OPENROUTESERVICE_API_KEY
 75 | PERPLEXITY_API_KEY=$PERPLEXITY_API_KEY
 76 | # Additional variables can be added here
 77 | EOL
 78 | 
 79 | echo "Environment variables saved to .env." &&
 80 | hzline &&
 81 | 
 82 | # Instructions for the next steps
 83 | echo
 84 | echo "Next Steps:"
 85 | echo "1. Build the Docker image by running the following command:"
 86 | echo "   sudo docker build -t telegrambot-openai-api ."
 87 | echo
 88 | echo "2. After building the image, start the bot container using:"
 89 | echo "   sudo docker run --env-file .env --name telegrambot-openai-api -d telegrambot-openai-api"
 90 | echo
 91 | echo "3. Check the container status with:"
 92 | echo "   sudo docker ps"
 93 | echo
 94 | echo "4. Check the logs with:"
 95 | echo "  sudo docker logs telegrambot-openai-api"
 96 | echo 
 97 | echo "5. Stop the container with:"
 98 | echo "   sudo docker stop <container_id>"
 99 | echo
100 | echo "After that, you're all set! Enjoy, and don't forget to start the repository if you like it. :-)"
101 | hzline &&
102 | echo ""
103 | 
104 | # optional build & run function
105 | function build_and_run() {
106 | # Build Docker image
107 | sudo docker build -t telegrambot-openai-api .
108 | if [[ $? -ne 0 ]]; then
109 |   echo "Error: Docker image build failed."
110 |   exit 1
111 | fi
112 | 
113 | # Run Docker container
114 | sudo docker run --env-file .env -d telegrambot-openai-api
115 | if [[ $? -ne 0 ]]; then
116 |   echo "Error: Failed to run the Docker container."
117 |   exit 1
118 | fi
119 | }
120 | 
121 | # build_and_run
122 | 


--------------------------------------------------------------------------------
/src/perplexity_handler.py:
--------------------------------------------------------------------------------
 1 | # ATTN: this module is currently not in use as of v0.737.
 2 | 
 3 | # # perplexity_handler.py
 4 | 
 5 | # import logging
 6 | # import json
 7 | # from telegram.constants import ParseMode
 8 | # from api_perplexity_search import query_perplexity, translate_response_chunked, split_message
 9 | 
10 | # MAX_TELEGRAM_MESSAGE_LENGTH = 4000
11 | 
12 | # async def handle_query_perplexity(context, update, chat_id, function_call, user_message, bot, chat_history):
13 | #     arguments = json.loads(function_call.get('arguments', '{}'))
14 | #     question = arguments.get('question', '')
15 | 
16 | #     if not question:
17 | #         logging.warning("No question was provided for the Perplexity query.")
18 | #         await context.bot.send_message(
19 | #             chat_id=chat_id,
20 | #             text="No question was provided for the Perplexity query. Please provide a question.",
21 | #             parse_mode=ParseMode.HTML
22 | #         )
23 | #         return True
24 | 
25 | #     # Make the asynchronous API call to query Perplexity
26 | #     perplexity_response = await query_perplexity(context.bot, chat_id, question)
27 | 
28 | #     # Log the raw Perplexity API response for debugging
29 | #     logging.info(f"Raw Perplexity API Response: {perplexity_response}")
30 | 
31 | #     if perplexity_response == "[System message: Perplexity API is currently unavailable due to server issues. Inform the user about this issue in their language.]":
32 | #         # Handle the system message for API unavailability
33 | #         logging.error("Perplexity API is down. Informing the model to notify the user.")
34 | #         await context.bot.send_message(
35 | #             chat_id=chat_id,
36 | #             text="Perplexity API is currently unavailable due to server issues. Please try again later.",
37 | #             parse_mode=ParseMode.HTML
38 | #         )
39 | #         return True
40 | 
41 | #     if perplexity_response is None:
42 | #         logging.error("No valid response from Perplexity, Perplexity response was None or empty.")
43 | #         await context.bot.send_message(
44 | #             chat_id=chat_id,
45 | #             text="No valid response from Perplexity, Perplexity response was None or empty.",
46 | #             parse_mode=ParseMode.HTML
47 | #         )
48 | #         return True
49 | 
50 | #     # Flag for translation in progress
51 | #     context.user_data['active_translation'] = True
52 | 
53 | #     # Translate or process the response as necessary
54 | #     bot_reply_formatted = await translate_response_chunked(bot, user_message, perplexity_response, context, update)
55 | 
56 | #     # After translation or processing is completed, clear the active translation flag
57 | #     context.user_data.pop('active_translation', None)
58 | 
59 | #     if isinstance(bot_reply_formatted, bool) and bot_reply_formatted:  # Check if translation function returned successfully
60 | #         return True  # Ensure function exits after handling success
61 | 
62 | #     if not bot_reply_formatted or bot_reply_formatted.startswith("Error"):
63 | #         logging.error("Error processing or translating the Perplexity response.")
64 | #         await context.bot.send_message(
65 | #             chat_id=chat_id,
66 | #             text="Error processing or translating the Perplexity response.",
67 | #             parse_mode=ParseMode.HTML
68 | #         )
69 | #         return True
70 | 
71 | #     # Append the bot's reply to the chat history before sending it
72 | #     chat_history.append({"role": "assistant", "content": bot_reply_formatted})
73 | #     context.chat_data['chat_history'] = chat_history  # Update the chat data with the new history
74 | 
75 | #     if len(bot_reply_formatted) > MAX_TELEGRAM_MESSAGE_LENGTH:
76 | #         # Split the message into chunks if it exceeds the maximum length
77 | #         chunks = split_message(bot_reply_formatted)
78 | 
79 | #         for chunk in chunks:
80 | #             await context.bot.send_message(
81 | #                 chat_id=chat_id,
82 | #                 text=chunk,
83 | #                 parse_mode=ParseMode.HTML
84 | #             )
85 | #             logging.info(f"Sent chunk with length: {len(chunk)}")
86 | #     else:
87 | #         await context.bot.send_message(
88 | #             chat_id=chat_id,
89 | #             text=bot_reply_formatted,
90 | #             parse_mode=ParseMode.HTML
91 | #         )
92 | #         logging.info(f"Sent message with length: {len(bot_reply_formatted)}")
93 | 
94 | #     logging.info("Response sent successfully, no further actions should be triggered.")
95 | #     return True
96 | 


--------------------------------------------------------------------------------
/src/api_get_maptiler.py:
--------------------------------------------------------------------------------
 1 | # api_get_maptiler.py
 2 | 
 3 | import logging
 4 | import httpx
 5 | import os
 6 | 
 7 | # the function below can be implemented to use for POI lookups
 8 | async def get_location_from_coordinates(latitude, longitude):
 9 |     logging.info(f"Fetching location information for coordinates: Latitude: {latitude}, Longitude: {longitude}")    
10 |     # Retrieve MapTiler API key from environment variables
11 |     api_key = os.getenv('MAPTILER_API_KEY')
12 |     if not api_key:
13 |         logging.info("[WARNING] MapTiler API key not set. You need to set the 'MAPTILER_API_KEY' environment variable for this function to work!")        
14 |         return "MapTiler API key not set."
15 | 
16 |     # Construct the API request URL for reverse geocoding
17 |     reverse_geocode_url = f"https://api.maptiler.com/geocoding/{longitude},{latitude}.json?key={api_key}"
18 |     logging.info(f"Making API request to URL: {reverse_geocode_url}")    
19 | 
20 |     async with httpx.AsyncClient() as client:
21 |         response = await client.get(reverse_geocode_url)
22 |         logging.info(f"Received response with status code: {response.status_code}")
23 | 
24 |         if response.status_code == 200:
25 |             data = response.json()
26 |             logging.info(f"Response data: {data}")            
27 |             # Process the response data to extract useful information
28 |             # For example, you might extract the nearest city name, points of interest, etc.
29 |             # Return this information
30 |             return data
31 |         else:
32 |             logging.info(f"Failed to fetch location information: {response.text}")            
33 |             return "Failed to fetch location information."
34 | 
35 | # this function can look up coordinates from a given address
36 | async def get_coordinates_from_address(address):
37 |     logging.info(f"Fetching coordinates for address: {address}")    
38 |     # Retrieve MapTiler API key from environment variables
39 |     api_key = os.getenv('MAPTILER_API_KEY')
40 |     if not api_key:
41 |         logging.error("[ERROR] MapTiler API key not set. You need to set the 'MAPTILER_API_KEY' environment variable for this function to work!")        
42 |         return "MapTiler API key not set."
43 | 
44 |     # Construct the API request URL for geocoding
45 |     geocode_url = f"https://api.maptiler.com/geocoding/{address}.json?key={api_key}"
46 |     logging.info(f"Making API request to URL: {geocode_url}")    
47 | 
48 |     async with httpx.AsyncClient() as client:
49 |         response = await client.get(geocode_url)
50 |         logging.info(f"Received response with status code: {response.status_code}")
51 | 
52 |         if response.status_code == 200:
53 |             data = response.json()
54 |             logging.info(f"Response data: {data}")            
55 |             # Assuming the first feature is the most relevant match
56 |             if data['features']:
57 |                 first_feature = data['features'][0]
58 |                 coordinates = first_feature['geometry']['coordinates']
59 |                 # Coordinates are returned as [longitude, latitude]
60 |                 return {'longitude': coordinates[0], 'latitude': coordinates[1]}
61 |             else:
62 |                 logging.info("No features found for the provided address.")
63 |                 return "No location found for the provided address."
64 |         else:
65 |             logging.error(f"Failed to fetch coordinates: {response.text}")            
66 |             return "Failed to fetch coordinates."
67 | 
68 | # get a map image (for maptiler's paid plan only)
69 | async def get_static_map_image(latitude, longitude, zoom, width, height, mapId='streets'):
70 |     api_key = os.getenv('MAPTILER_API_KEY')
71 |     if not api_key:
72 |         logging.error("[ERROR] MapTiler API key not set.")
73 |         return "MapTiler API key not set."
74 | 
75 |     scale = '@2x'  # For HiDPI/Retina maps
76 |     format = 'png'  # Output format
77 |     url = f"https://api.maptiler.com/maps/{mapId}/static/{longitude},{latitude},{zoom}/{width}x{height}{scale}.{format}?key={api_key}"
78 | 
79 |     async with httpx.AsyncClient() as client:
80 |         response = await client.get(url)
81 |         if response.status_code == 200:
82 |             # Save the image to a file for debugging
83 |             with open('map_image.png', 'wb') as f:
84 |                 f.write(response.content)
85 |             logging.info("Static map image saved successfully.")
86 |             return response.content  # Returns the image data
87 |         else:
88 |             logging.error(f"Failed to generate static map: Status code {response.status_code}")
89 |             return None
90 | 


--------------------------------------------------------------------------------
/src/api_key.py:
--------------------------------------------------------------------------------
  1 | # api_key.py
  2 | # Read the OPENAI API key with configurable fallback
  3 | 
  4 | import os
  5 | import sys
  6 | import configparser
  7 | import logging
  8 | from config_paths import CONFIG_PATH, API_TOKEN_PATH # Import the centralized CONFIG_PATH
  9 | 
 10 | # Set up basic logging
 11 | # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 12 | 
 13 | # Flag to enable or disable fallback to environment variable if the key is not found in the file
 14 | ENABLE_KEY_READING_FALLBACK = True
 15 | 
 16 | def read_env_api_key():
 17 |     """
 18 |     Reads the OpenAI API key from the environment variable.
 19 | 
 20 |     Returns:
 21 |         str: The API key if found, else None.
 22 |     """
 23 |     api_key = os.getenv('OPENAI_API_KEY')
 24 |     if api_key:
 25 |         logging.info("OpenAI API key loaded from environment variable.")
 26 |     return api_key
 27 | 
 28 | def get_api_key(config_path=CONFIG_PATH, token_file=API_TOKEN_PATH):
 29 |     """
 30 |     Retrieves the OpenAI API key, prioritizing the method as per the config file or defaults.
 31 | 
 32 |     Args:
 33 |         config_path (str): Path to the configuration file.
 34 |         token_file (str): Path to the file containing the API key.
 35 | 
 36 |     Returns:
 37 |         str: The OpenAI API key.
 38 | 
 39 |     Raises:
 40 |         SystemExit: If the API key is not found through any method.
 41 |     """
 42 |     config = configparser.ConfigParser()
 43 |     api_key = None
 44 | 
 45 |     try:
 46 |         config.read(config_path)
 47 |         if not config.sections():
 48 |             logging.warning(f"Config file '{config_path}' is missing or empty. OpenAI API key reading falling back to environment variable preference.")
 49 |             prefer_env = True  # Defaulting to True if config read fails
 50 |         else:
 51 |             prefer_env = config.getboolean('DEFAULT', 'PreferEnvForAPIKey', fallback=True)
 52 |             logging.info(f"Preference for environment variables for the OpenAI API key set in config: {'Yes' if prefer_env else 'No'}")
 53 |     except Exception as e:
 54 |         logging.error(f"Failed to read OpenAI API key from config file: {e}")
 55 |         prefer_env = True  # Defaulting to True if config read fails
 56 |         logging.info("Defaulting to environment variable preference due to config read failure.")
 57 | 
 58 |     if prefer_env:
 59 |         api_key = read_env_api_key()
 60 |         if api_key:
 61 |             return api_key.strip()
 62 | 
 63 |     if not api_key:
 64 |         try:
 65 |             with open(token_file, 'r') as file:
 66 |                 api_key = file.read().strip()
 67 |                 if api_key:
 68 |                     logging.info("OpenAI API key loaded from file.")
 69 |                     return api_key
 70 |         except FileNotFoundError:
 71 |             logging.warning("OpenAI API token file not found.")
 72 |             if not prefer_env and ENABLE_KEY_READING_FALLBACK:
 73 |                 api_key = read_env_api_key()
 74 |                 if api_key:
 75 |                     return api_key.strip()
 76 | 
 77 |     if not api_key:
 78 |         logging.error("The OPENAI_API_KEY environment variable is not set, and `api_token.txt` was not found. Please set either one and adjust `config.ini` if needed for the preferred load order.")
 79 |         sys.exit(1)
 80 | 
 81 | # Example usage for standalone testing
 82 | if __name__ == "__main__":
 83 |     api_key = get_api_key()
 84 |     print("OpenAI API Key (for testing & debugging only):", api_key)
 85 | 
 86 | # ~~~ old method below ~~~
 87 | # import os
 88 | # import sys
 89 | # import configparser
 90 | 
 91 | # # set `prefer_env` to `True` if you wish to prioritize the environment variable over the configuration text file
 92 | # # (determines load order)
 93 | # def get_api_key():
 94 | #     config = configparser.ConfigParser()
 95 | #     config.read('config.ini')
 96 | #     prefer_env = config.getboolean('DEFAULT', 'PreferEnvForAPIKey', fallback=True)
 97 | 
 98 | #     if prefer_env:
 99 | #         api_key = os.getenv('OPENAI_API_KEY')
100 | #         if api_key is not None:
101 | #             return api_key
102 | 
103 | #     try:
104 | #         with open('api_token.txt', 'r') as file:
105 | #             return file.read().strip()
106 | #     except FileNotFoundError:
107 | #         if not prefer_env:
108 | #             api_key = os.getenv('OPENAI_API_KEY')
109 | #             if api_key is not None:
110 | #                 return api_key
111 | 
112 | #         print("The OPENAI_API_KEY environment variable is not set, and `api_token.txt` was not found. Please set either one and adjust `config.ini` if needed for the preferred load order.")
113 | #         sys.exit(1)
114 | 


--------------------------------------------------------------------------------
/src/calc_module.py:
--------------------------------------------------------------------------------
  1 | # calc_module.py
  2 | #
  3 | # From:
  4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5 | # https://github.com/FlyingFathead/TelegramBot-OpenAI-API
  6 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  7 | # (updated Oct 13, 2024)
  8 | 
  9 | import ast
 10 | import operator
 11 | import logging
 12 | import re
 13 | 
 14 | # Initialize the logger
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | # Below are some safety measures so that the outputs aren't absolutely insane in length.
 18 | # Define maximum allowed length for the result and maximum magnitude
 19 | MAX_OUTPUT_LENGTH = 500  # Adjust as necessary
 20 | MAX_MAGNITUDE = 1e100    # Example maximum magnitude
 21 | 
 22 | def preprocess_expression(expression: str) -> str:
 23 |     """
 24 |     Preprocess the input expression to handle natural language constructs like 'of' and percentages.
 25 |     For example, convert '0.1% of 200000000' to '0.1 / 100 * 200000000'.
 26 |     """
 27 |     # Handle 'of' by replacing it with '*'
 28 |     expression = re.sub(r'\bof\b', '*', expression, flags=re.IGNORECASE)
 29 |     
 30 |     # Handle percentages: convert 'X%' to '(X/100)'
 31 |     expression = re.sub(r'(\d+(\.\d+)?)\s*%', r'(\1/100)', expression)
 32 |     
 33 |     logger.debug(f"Preprocessed expression: {expression}")
 34 |     return expression
 35 | 
 36 | def safe_eval(expression: str):
 37 |     # Replace '^' with '**' for exponentiation
 38 |     expression = expression.replace('^', '**')
 39 | 
 40 |     allowed_operators = {
 41 |         ast.Add: operator.add,
 42 |         ast.Sub: operator.sub,
 43 |         ast.Mult: operator.mul,
 44 |         ast.Div: operator.truediv,
 45 |         ast.Mod: operator.mod,
 46 |         ast.Pow: operator.pow
 47 |     }
 48 | 
 49 |     def _eval(node):
 50 |         if isinstance(node, ast.BinOp):
 51 |             if type(node.op) in allowed_operators:
 52 |                 left = _eval(node.left)
 53 |                 right = _eval(node.right)
 54 |                 op_func = allowed_operators[type(node.op)]
 55 |                 result = op_func(left, right)
 56 |                 
 57 |                 # Logging the operation being performed
 58 |                 logger.debug(f"Evaluating: {left} {type(node.op).__name__} {right} = {result}")
 59 | 
 60 |                 # Check if the result is within acceptable magnitude
 61 |                 if abs(result) > MAX_MAGNITUDE:
 62 |                     error_msg = f"Result magnitude exceeds the maximum allowed limit: {result}"
 63 |                     logger.error(error_msg)
 64 |                     raise ValueError(error_msg)
 65 | 
 66 |                 return result
 67 |             else:
 68 |                 error_msg = f"Unsupported operation: {type(node.op).__name__}"
 69 |                 logger.error(error_msg)
 70 |                 raise ValueError(error_msg)
 71 |         elif isinstance(node, ast.Num):
 72 |             logger.debug(f"Numeric literal: {node.n}")
 73 |             return node.n
 74 |         elif isinstance(node, ast.Expression):
 75 |             return _eval(node.body)
 76 |         elif isinstance(node, ast.UnaryOp) and isinstance(node.op, (ast.UAdd, ast.USub)):
 77 |             operand = _eval(node.operand)
 78 |             if isinstance(node.op, ast.UAdd):
 79 |                 return +operand
 80 |             elif isinstance(node.op, ast.USub):
 81 |                 return -operand
 82 |         else:
 83 |             error_msg = f"Unsupported type: {type(node).__name__}"
 84 |             logger.error(error_msg)
 85 |             raise ValueError(error_msg)
 86 | 
 87 |     try:
 88 |         node = ast.parse(expression, mode='eval')
 89 |         logger.info(f"Parsed expression: {expression}")
 90 |         return _eval(node.body)
 91 |     except Exception as e:
 92 |         logger.exception(f"Error parsing or evaluating expression: {expression}")
 93 |         raise
 94 | 
 95 | async def calculate_expression(expression: str):
 96 |     logger.info(f"Calculating expression: {expression}")
 97 |     try:
 98 |         # Preprocess the expression to handle 'of' and '%'
 99 |         processed_expression = preprocess_expression(expression)
100 |         
101 |         result = safe_eval(processed_expression)
102 |         
103 |         # Check if the result length is within limits
104 |         result_str = str(result)
105 |         if len(result_str) > MAX_OUTPUT_LENGTH:
106 |             error_message = f"Result exceeds the maximum allowed length of {MAX_OUTPUT_LENGTH} characters."
107 |             logger.error(error_message)
108 |             return error_message
109 | 
110 |         # Construct the success message
111 |         result_message = f"The result of {expression} is {result}."
112 |         logger.info(f"Calculation successful: {result_message}")
113 |         return result_message
114 |     except ValueError as ve:
115 |         # Specific handling for ValueError (e.g., unsupported operations)
116 |         error_message = f"Error evaluating expression `{expression}`: {str(ve)}"
117 |         logger.error(error_message)
118 |         return error_message
119 |     except Exception as e:
120 |         # General error handling
121 |         error_message = f"An unexpected error occurred while evaluating `{expression}`: {str(e)}"
122 |         logger.error(error_message)
123 |         return error_message
124 | 


--------------------------------------------------------------------------------
/src/api_get_website_dump.py:
--------------------------------------------------------------------------------
  1 | # api_get_website_dump.py
  2 | 
  3 | import urllib.parse
  4 | import subprocess
  5 | import logging
  6 | import tiktoken  # for token counting
  7 | import sys
  8 | import asyncio
  9 | import re
 10 | 
 11 | # Configuration
 12 | USE_DOMAIN_RESTRICTIONS = False  # Flag to enable or disable domain restriction logic
 13 | ALLOW_ONLY = True  # If True, only allowed domains are permitted. If False, only disallowed domains are blocked.
 14 | 
 15 | ALLOWED_DOMAINS = [
 16 |     '*.fi',        # Allow all .fi domains
 17 |     'google.com',  # Allow google.com and all subdomains
 18 |     'openai.com',  # Allow openai.com and all subdomains
 19 | ]
 20 | 
 21 | DISALLOWED_DOMAINS = [
 22 |     # Add specific domains or patterns you want to disallow, if any
 23 | ]
 24 | 
 25 | # check if the domain is allowed or not
 26 | def is_domain_allowed(url):
 27 |     if not USE_DOMAIN_RESTRICTIONS:
 28 |         logging.warning("Domain restrictions are NOT in use. All domains are allowed.")
 29 |         return True  # If restrictions are not used, allow all domains
 30 | 
 31 |     parsed_url = urllib.parse.urlparse(url)
 32 |     domain = parsed_url.netloc
 33 | 
 34 |     if ALLOW_ONLY:
 35 |         # In "allow only" mode, allow only domains in ALLOWED_DOMAINS
 36 |         for allowed in ALLOWED_DOMAINS:
 37 |             if re.fullmatch(allowed.replace('*', '.*'), domain):
 38 |                 return True
 39 |         logging.warning(f"Domain not allowed: {domain}")
 40 |         return False
 41 |     else:
 42 |         # In "disallow only" mode, disallow only domains in DISALLOWED_DOMAINS
 43 |         for disallowed in DISALLOWED_DOMAINS:
 44 |             if re.fullmatch(disallowed.replace('*', '.*'), domain):
 45 |                 logging.warning(f"Disallowed domain: {domain}")
 46 |                 return False
 47 |         return True  # Allow all other domains if not disallowed
 48 | 
 49 | # get the website dump
 50 | async def get_website_dump(url, max_tokens=10000):
 51 |     """
 52 |     Fetches the content of a website using lynx --dump and returns it as a string.
 53 |     Ensures the content doesn't exceed the specified max token count.
 54 |     Cleans up unnecessary content and retains meaningful newlines.
 55 |     """
 56 | 
 57 |     # Check if the domain is allowed
 58 |     if not is_domain_allowed(url):
 59 |         error_message = f"Error: Cannot browse the address, not allowed for URL: {url}"
 60 |         logging.error(error_message)
 61 |         return error_message
 62 | 
 63 |     try:
 64 |         # Execute the lynx command to fetch the website content
 65 |         result = subprocess.run(['lynx', '--dump', url], capture_output=True, text=True, timeout=15)
 66 | 
 67 |         # Check if the command was successful
 68 |         if result.returncode == 0:
 69 |             content = result.stdout
 70 | 
 71 |             # Filter out non-informative content using regex
 72 |             # content = re.sub(r'\[.*?\]|\(BUTTON\)|\s{2,}', ' ', content)  # Remove links, buttons, and excessive spaces
 73 | 
 74 |             # Replace multiple spaces and tabs with a single space
 75 |             content = re.sub(r'\s+', ' ', content)
 76 | 
 77 |             # Keep meaningful newlines (keep single newlines, avoid empty lines)
 78 |             content = re.sub(r'\s*\n\s*', '\n', content)  # Clean up newlines
 79 |             content = re.sub(r'\n{2,}', '\n', content)  # Ensure no multiple consecutive newlines
 80 | 
 81 |             # Use the correct encoding for GPT-4o
 82 |             enc = tiktoken.encoding_for_model("gpt-4o")  # Load the appropriate tokenizer for GPT-4o
 83 |             tokens = enc.encode(content)
 84 | 
 85 |             # Log the fetched content and token count
 86 |             logging.info(f"Upon user's request, fetched content from: {url}")
 87 |             logging.info(f"Token count: {len(tokens)}")
 88 | 
 89 |             # If the token count exceeds the max_tokens, truncate the content
 90 |             if len(tokens) > max_tokens:
 91 |                 # Trim tokens to fit within the max_tokens
 92 |                 tokens = tokens[:max_tokens]
 93 |                 # Decode the trimmed tokens back to text
 94 |                 content = enc.decode(tokens)
 95 |                 logging.info(f"Content truncated to {max_tokens} tokens.")
 96 | 
 97 |             return content.strip()
 98 |         else:
 99 |             error_message = f"Error: Unable to fetch content from {url}. Return code: {result.returncode}"
100 |             logging.error(error_message)
101 |             return error_message
102 | 
103 |     except subprocess.TimeoutExpired:
104 |         error_message = f"Error: Timed out while trying to fetch content from {url}."
105 |         logging.error(error_message)
106 |         return error_message
107 | 
108 |     except Exception as e:
109 |         error_message = f"Error: An exception occurred while fetching content from {url}: {str(e)}"
110 |         logging.error(error_message)
111 |         return error_message
112 | 
113 | # Tester to run the script directly
114 | if __name__ == "__main__":
115 |     if len(sys.argv) != 2:
116 |         print("Usage: python api_get_website_dump.py <url>")
117 |         sys.exit(1)
118 | 
119 |     url = sys.argv[1]
120 |     
121 |     # Set up basic logging to console
122 |     logging.basicConfig(level=logging.INFO)
123 | 
124 |     # Run the function and print the result
125 |     result = asyncio.run(get_website_dump(url))
126 |     print(result)
127 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | # utils.py
  2 | import os
  3 | import re
  4 | import shutil
  5 | import sys
  6 | import datetime
  7 | from functools import partial
  8 | import asyncio
  9 | from concurrent.futures import ThreadPoolExecutor
 10 | from pydub import AudioSegment
 11 | import json
 12 | import httpx
 13 | import openai
 14 | 
 15 | # Elasticsearch checks
 16 | from config_paths import (
 17 |     ELASTICSEARCH_ENABLED, ELASTICSEARCH_HOST, ELASTICSEARCH_PORT,
 18 |     ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD
 19 | )
 20 | 
 21 | # juhlapäivien käännösnimet
 22 | holiday_replacements = {
 23 |     "New Year's Day": "uudenvuodenpäivä (New Year's Day)",
 24 |     "Epiphany": "loppiainen (Epiphany)",
 25 |     "Good Friday": "pitkäperjantai (Good Friday)",
 26 |     "Easter Sunday": "pääsiäispäivä (Easter Sunday)",
 27 |     "Easter Monday": "2. pääsiäispäivä (Easter Monday)",
 28 |     "May Day": "vappu (May Day)",
 29 |     "Ascension Day": "helatorstai (Ascension Day)",
 30 |     "Whit Sunday": "helluntaipäivä (Whit Sunday)",
 31 |     "Midsummer Eve": "juhannusaatto (Midsummer Eve)",
 32 |     "Midsummer Day": "juhannuspäivä (Midsummer Day)",
 33 |     "All Saints' Day": "pyhäinpäivä (All Saints' Day)",
 34 |     "Independence Day": "itsenäisyyspäivä (Independence Day)",
 35 |     "Christmas Eve": "jouluaatto (Christmas Eve)",
 36 |     "Christmas Day": "joulupäivä (Christmas Day)",
 37 |     "Second Day of Christmas": "Tapaninpäivä (Second Day of Christmas)",
 38 |     "New Year's Eve": "uudenvuodenaatto (New Year's Eve)",
 39 |     "May Day Eve": "vappuaatto (May Day Eve)"
 40 | }
 41 | 
 42 | # set `now`
 43 | now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 44 | 
 45 | # print term width horizontal line
 46 | def hz_line(character='-'):
 47 |     terminal_width = shutil.get_terminal_size().columns
 48 |     line = character * terminal_width
 49 |     print(line)
 50 |     sys.stdout.flush()  # Flush the output to the terminal immediately
 51 | 
 52 | # print the startup message
 53 | def print_startup_message(version_number):
 54 |     now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 55 |     hz_line()
 56 |     print(f"::: [{now}] Telegram bot (powered by ChatKeke) v.{version_number} starting up...", flush=True)
 57 |     # Print Elasticsearch status
 58 |     print(f"::: Elasticsearch enabled: {ELASTICSEARCH_ENABLED}", flush=True)
 59 |     hz_line()
 60 | 
 61 | # remove html tags
 62 | def remove_html_tags(text):
 63 |     """Remove html tags from a string"""
 64 |     clean = re.compile('<.*?>')
 65 |     return re.sub(clean, '', text)
 66 | 
 67 | # escape markdown v2, v0.12 [currently not in use because this is a ... it's a thing]
 68 | def escape_markdown_v2(text):
 69 | 
 70 |     # Escape MarkdownV2 special characters
 71 |     def escape_special_chars(m):
 72 |         char = m.group(0)
 73 |         # Escape all special characters with a backslash, except for asterisks and underscores
 74 |         if char in ('_', '*', '`'):
 75 |             # These are used for formatting and shouldn't be escaped.
 76 |             return char
 77 |         return '\\' + char
 78 | 
 79 |     # First, we'll handle the code blocks by temporarily removing them
 80 |     code_blocks = re.findall(r'```.*?```', text, re.DOTALL)
 81 |     code_placeholders = [f"CODEBLOCK{i}" for i in range(len(code_blocks))]
 82 |     for placeholder, block in zip(code_placeholders, code_blocks):
 83 |         text = text.replace(block, placeholder)
 84 | 
 85 |     # Now we escape the special characters outside of the code blocks
 86 |     text = re.sub(r'([[\]()~>#+\-=|{}.!])', escape_special_chars, text)
 87 | 
 88 |     # We convert **bold** and *italic* (or _italic_) syntax to Telegram's MarkdownV2 syntax
 89 |     # Bold: **text** to *text*
 90 |     text = re.sub(r'\*\*(.+?)\*\*', r'*\1*', text)
 91 |     # Italic: *text* or _text_ to _text_ (if not part of a code block)
 92 |     text = re.sub(r'\b_(.+?)_\b', r'_\1_', text)
 93 |     text = re.sub(r'\*(.+?)\*', r'_\1_', text)
 94 | 
 95 |     # Restore the code blocks
 96 |     for placeholder, block in zip(code_placeholders, code_blocks):
 97 |         text = text.replace(placeholder, block)
 98 | 
 99 |     return text
100 | 
101 | # Calculate the total size of files in the specified directory.
102 | def get_directory_size(path: str) -> int:    
103 |     total_size = 0
104 |     for dirpath, dirnames, filenames in os.walk(path):
105 |         for f in filenames:
106 |             fp = os.path.join(dirpath, f)
107 |             total_size += os.path.getsize(fp)
108 |     return total_size
109 | 
110 | # Cleanup the oldest files in the specified directory when storage limit is exceeded.
111 | def cleanup_data_directory(path: str, max_storage_mb: int):    
112 |     files = [os.path.join(path, f) for f in os.listdir(path)]
113 |     files.sort(key=lambda x: os.path.getmtime(x))
114 | 
115 |     while get_directory_size(path) >= max_storage_mb * 1024 * 1024 and files:
116 |         os.remove(files.pop(0)) # Remove the oldest file
117 | 
118 | # examine an audio file's length (for WhisperAPI transcriptions)
119 | # ~
120 | # This function doesn't inherently need to be async, as pydub's processing is synchronous.
121 | # However, if you're performing asynchronous file I/O or need to integrate with other async code, it can be async.
122 | # when in async mode
123 | executor = ThreadPoolExecutor(10)  # Adjust the number of workers based on your needs
124 | # the function
125 | async def get_voice_message_duration(voice_file_path):
126 |     loop = asyncio.get_running_loop()
127 |     audio = await loop.run_in_executor(executor, AudioSegment.from_file, voice_file_path)
128 |     duration_seconds = len(audio) / 1000
129 |     duration_minutes = duration_seconds / 60
130 |     return duration_minutes
131 | 


--------------------------------------------------------------------------------
/src/reminder_poller.py:
--------------------------------------------------------------------------------
  1 | # src/reminder_poller.py
  2 | 
  3 | import asyncio
  4 | import logging
  5 | import configparser
  6 | from datetime import datetime, timezone # Import timezone
  7 | 
  8 | # --- Corrected Imports ---
  9 | from config_paths import CONFIG_PATH, REMINDERS_DB_PATH
 10 | import db_utils
 11 | from telegram.ext import Application
 12 | from telegram.error import Forbidden, BadRequest
 13 | from telegram.constants import ParseMode
 14 | 
 15 | # load and use logger
 16 | logger = logging.getLogger(__name__)
 17 | logger.setLevel(logging.INFO)
 18 | 
 19 | # Load configuration
 20 | config = configparser.ConfigParser()
 21 | config.read(CONFIG_PATH)
 22 | 
 23 | # Read configuration safely
 24 | try:
 25 |     POLLING_INTERVAL = config.getint('Reminders', 'PollingIntervalSeconds', fallback=60) # Default to 60s
 26 |     REMINDERS_ENABLED = config.getboolean('Reminders', 'EnableReminders', fallback=False)
 27 | except configparser.NoSectionError:
 28 |     logger.warning("[Reminders] section missing in config.ini, using defaults (Polling=60s, Enabled=False)")
 29 |     POLLING_INTERVAL = 60
 30 |     REMINDERS_ENABLED = False
 31 | except ValueError:
 32 |     logger.error("Invalid non-integer value for PollingIntervalSeconds in config.ini. Using default 60s.")
 33 |     POLLING_INTERVAL = 60
 34 |     REMINDERS_ENABLED = config.getboolean('Reminders', 'EnableReminders', fallback=False) # Still try to read enable flag
 35 | 
 36 | # split to fit to telegram's msg length
 37 | MAX_TG_MSG_LENGTH = 4096
 38 | 
 39 | def split_long_message(message, max_length=MAX_TG_MSG_LENGTH):
 40 |     """
 41 |     Splits a message into multiple parts, each up to max_length characters,
 42 |     and returns a list of parts.
 43 |     """
 44 |     parts = []
 45 |     start_index = 0
 46 |     while start_index < len(message):
 47 |         # Slice out a chunk of up to 'max_length' characters
 48 |         part = message[start_index:start_index + max_length]
 49 |         parts.append(part)
 50 |         start_index += max_length
 51 |     return parts
 52 | 
 53 | # --- Corrected Function Signature ---
 54 | async def reminder_poller(application: Application):
 55 |     """Periodically checks for due reminders and sends notifications."""
 56 | 
 57 |     # Check if the feature is enabled right at the start
 58 |     if not REMINDERS_ENABLED:
 59 |         logger.info("Reminder Poller exiting: Feature disabled in config.ini.")
 60 |         return # Stop the poller task if disabled
 61 | 
 62 |     # Check if the database was initialized successfully
 63 |     if not db_utils.DB_INITIALIZED_SUCCESSFULLY:
 64 |         logger.error("Reminder Poller exiting: DB was not initialized successfully.")
 65 |         return
 66 | 
 67 |     logger.info(f"Reminder poller started. Checking every {POLLING_INTERVAL} seconds.")
 68 | 
 69 |     while True:
 70 |         try:
 71 |             # --- Get Current Time ---
 72 |             now_utc_str = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
 73 | 
 74 |             # --- Fetch due reminders using the correct DB path and time ---
 75 |             due_reminders = db_utils.get_due_reminders(REMINDERS_DB_PATH, now_utc_str)
 76 | 
 77 |             if due_reminders:
 78 |                 logger.info(f"Found {len(due_reminders)} due reminders.")
 79 |                 for r in due_reminders:
 80 |                     reminder_id = r['reminder_id']
 81 |                     user_id = r['user_id']
 82 |                     chat_id = r['chat_id']
 83 |                     raw_text = r['reminder_text']
 84 | 
 85 |                     # The text you'd like to send (with an optional emoji, etc.)
 86 |                     msg = f"🔔 {raw_text}"
 87 | 
 88 |                     # 1) Split into multiple parts if over 4k
 89 |                     msg_parts = split_long_message(msg)
 90 | 
 91 |                     try:
 92 |                         # 2) Send each part in a separate message
 93 |                         for part in msg_parts:
 94 |                             await application.bot.send_message(
 95 |                                 chat_id=chat_id,
 96 |                                 text=part,
 97 |                                 parse_mode=ParseMode.HTML
 98 |                             )
 99 | 
100 |                         # 3) Mark the reminder as sent
101 |                         db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'sent')
102 |                         logger.info(f"Sent reminder {reminder_id} to chat {chat_id} for user {user_id}.")
103 | 
104 |                     # --- Specific Error Handling ---
105 |                     except Forbidden:
106 |                         logger.warning(f"Failed sending reminder {reminder_id} to chat {chat_id}. Bot forbidden (blocked?).")
107 |                         db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'failed_forbidden')
108 |                     except BadRequest as e:
109 |                         logger.error(f"Failed sending reminder {reminder_id} to chat {chat_id}. Bad request (chat not found?): {e}")
110 |                         db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'failed_bad_request')
111 |                     except Exception as e:
112 |                         logger.error(f"Unexpected error sending reminder {reminder_id} to chat {chat_id}: {e}")
113 |                         # Decide: update status to 'failed_unknown' or leave 'pending' to retry?
114 |                         # Let's mark as failed for now to avoid potential spamming if the error persists.
115 |                         db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'failed_unknown')
116 |             else:
117 |                 logger.debug("No reminders due.")
118 | 
119 |         except Exception as e:
120 |              logger.error(f"Error in reminder polling loop: {e}")
121 |              # Avoid crashing the poller, wait before next cycle
122 |              await asyncio.sleep(POLLING_INTERVAL) # Still wait even if there was an error fetching
123 | 
124 |         # Wait for the next polling interval
125 |         await asyncio.sleep(POLLING_INTERVAL)


--------------------------------------------------------------------------------
/src/api_get_openrouteservice.py:
--------------------------------------------------------------------------------
  1 | # api_get_openrouteservice.py
  2 | 
  3 | import os
  4 | import httpx
  5 | import logging
  6 | import json
  7 | import openai
  8 | 
  9 | # Function to retrieve the OpenRouteService API key
 10 | def get_openrouteservice_api_key():
 11 |     api_key = os.getenv('OPENROUTESERVICE_API_KEY')
 12 |     if not api_key:
 13 |         logging.error("OpenRouteService API key not set.")
 14 |         return None
 15 |     return api_key
 16 | 
 17 | # Async function to get geographic coordinates from an address
 18 | async def geocode_address(address, api_key):
 19 |     base_url = 'https://api.openrouteservice.org/geocode/search'
 20 |     params = {
 21 |         'api_key': api_key,
 22 |         'text': address
 23 |     }
 24 |     async with httpx.AsyncClient() as client:
 25 |         response = await client.get(base_url, params=params)
 26 |         if response.status_code == 200:
 27 |             data = response.json()
 28 |             # Assumes the first feature is the most relevant match
 29 |             geometry = data['features'][0]['geometry']
 30 |             return geometry['coordinates']
 31 |         else:
 32 |             logging.error(f"Geocoding error: {response.text}")
 33 |             return None
 34 | 
 35 | # async function to get directions
 36 | async def get_route(start_coords, end_coords, profile="driving-car", format="json"):
 37 |     api_key = get_openrouteservice_api_key()
 38 |     if not api_key:
 39 |         return "OpenRouteService API key not set."
 40 | 
 41 |     base_url = f'https://api.openrouteservice.org/v2/directions/{profile}/{format}'
 42 |     headers = {
 43 |         'Authorization': api_key,
 44 |         'Content-Type': 'application/json',
 45 |     }
 46 |     body = {
 47 |         'coordinates': [start_coords, end_coords],  # Correct format for coordinates
 48 |     }
 49 | 
 50 |     async with httpx.AsyncClient() as client:
 51 |         response = await client.post(base_url, headers=headers, json=body)
 52 | 
 53 |         if response.status_code == 200:
 54 |             route_data = response.json()
 55 |             logging.info(f"API Response: {response.json()}")
 56 |             directions = format_route(route_data)
 57 |             return directions
 58 |         else:
 59 |             error_message = f"Failed to get directions. API error cause: {response.text}"
 60 |             logging.error(error_message)
 61 |             return error_message
 62 |         
 63 | # Function to format the routing data into a user-friendly message
 64 | def format_route(data):
 65 |     # Assuming 'routes' is the correct key and contains the expected data
 66 |     if 'routes' in data and len(data['routes']) > 0:
 67 |         # Assuming the first route and its first segment are what we're interested in
 68 |         steps = data['routes'][0]['segments'][0]['steps']
 69 |         instructions = [step['instruction'] for step in steps]
 70 |         return ' '.join(instructions)
 71 |     else:
 72 |         logging.error("Missing 'routes', 'segments', or 'steps' in API response.")
 73 |         return "Error: API response is missing required information."
 74 | 
 75 | # Function that wraps the geocoding of two addresses and getting the route between them
 76 | async def get_directions_from_addresses(start_address, end_address, profile="driving-car"):
 77 |     api_key = get_openrouteservice_api_key()
 78 |     if not api_key:
 79 |         return "OpenRouteService API key not set."
 80 | 
 81 |     start_coords = await geocode_address(start_address, api_key)
 82 |     end_coords = await geocode_address(end_address, api_key)
 83 |     
 84 |     if start_coords and end_coords:
 85 |         return await get_route(start_coords, end_coords, profile)
 86 |     else:
 87 |         return "Could not geocode one or both of the addresses. Please ask the user to clarify."
 88 | 
 89 | # Format the directions information and translate it if necessary.
 90 | async def format_and_translate_directions(bot, user_request, directions_info):
 91 |     # System message to instruct the model
 92 |     format_translate_system_message = {
 93 |         "role": "system",
 94 |         "content": "Format the incoming data into a human readable format. Translate if needed (depending on user's language) and format the data into a digestible Telegram message with emoji symbols and HTML parse mode tags. Use i.e. <b>Directions</b> etc. Respond in user's original language!"
 95 |     }
 96 | 
 97 |     # Prepare chat history with the user's request, system message, and directions info
 98 |     chat_history = [
 99 |         {"role": "user", "content": user_request},
100 |         format_translate_system_message,
101 |         {"role": "assistant", "content": directions_info}
102 |     ]
103 | 
104 |     # Prepare the payload for the OpenAI API
105 |     payload = {
106 |         "model": bot.model,
107 |         "messages": chat_history,
108 |         "temperature": 0.5
109 |     }
110 | 
111 |     headers = {
112 |         "Content-Type": "application/json",
113 |         "Authorization": f"Bearer {openai.api_key}"
114 |     }
115 | 
116 |     # Make the API request
117 |     async with httpx.AsyncClient() as client:
118 |         response = await client.post("https://api.openai.com/v1/chat/completions",
119 |                                      data=json.dumps(payload),
120 |                                      headers=headers,
121 |                                      timeout=bot.timeout)
122 |         response_json = response.json()
123 | 
124 |     # Extract the formatted and potentially translated response
125 |     if response.status_code == 200 and 'choices' in response_json:
126 |         translated_reply = response_json['choices'][0]['message']['content'].strip()
127 |         bot_token_count = bot.count_tokens(translated_reply)  # Count the tokens in the translated reply
128 |         bot.total_token_usage += bot_token_count  # Add to the total token usage
129 |         bot.write_total_token_usage(bot.total_token_usage)  # Update the total token usage file
130 |         logging.info(f"Sent this directions report to user: {translated_reply}")
131 |         return translated_reply
132 |     else:
133 |         logging.error("Error in formatting and translating directions data.")
134 |         return directions_info  # Return the original directions info in case of error
135 | 


--------------------------------------------------------------------------------
/src/config_paths.py:
--------------------------------------------------------------------------------
  1 | # config_paths.py
  2 | 
  3 | import os
  4 | from pathlib import Path
  5 | import configparser
  6 | import logging
  7 | 
  8 | # Initialize the logger for this module
  9 | logger = logging.getLogger('TelegramBotLogger')  # Ensure that 'TelegramBotLogger' is initialized in main.py
 10 | 
 11 | # Define the base directory (the parent of the 'src' directory)
 12 | BASE_DIR = Path(__file__).resolve().parents[1]
 13 | 
 14 | # Path to the configuration file
 15 | CONFIG_PATH = BASE_DIR / 'config' / 'config.ini'
 16 | 
 17 | # Initialize the ConfigParser
 18 | config = configparser.ConfigParser()
 19 | 
 20 | # Initialize variables with default values
 21 | logs_directory = 'logs'
 22 | LOG_FILE_PATH = BASE_DIR / logs_directory / 'bot.log'
 23 | CHAT_LOG_FILE_PATH = BASE_DIR / logs_directory / 'chat.log'
 24 | TOKEN_USAGE_FILE_PATH = BASE_DIR / logs_directory / 'token_usage.json'
 25 | CHAT_LOG_MAX_SIZE = 10 * 1024 * 1024  # 10 MB
 26 | ELASTICSEARCH_ENABLED = False
 27 | ELASTICSEARCH_HOST = 'localhost'
 28 | ELASTICSEARCH_PORT = 9200
 29 | ELASTICSEARCH_USERNAME = ''
 30 | ELASTICSEARCH_PASSWORD = ''
 31 | 
 32 | # Default NWS settings
 33 | NWS_USER_AGENT = 'ChatKekeWeather/1.0 (flyingfathead@protonmail.com)'
 34 | NWS_RETRIES = 0
 35 | NWS_RETRY_DELAY = 2
 36 | 
 37 | # read the reminders db
 38 | data_directory_name = 'data' # Default name for data directory
 39 | REMINDERS_DB_FILENAME = 'reminders.db' # Default name for the reminders DB file
 40 | 
 41 | # Attempt to read the configuration file
 42 | if CONFIG_PATH.exists():
 43 |     try:
 44 |         config.read(CONFIG_PATH)
 45 |         logger.info(f"Configuration file found and loaded from {CONFIG_PATH}.")
 46 |         
 47 |         # Read logs directory
 48 |         logs_directory = config['DEFAULT'].get('LogsDirectory', 'logs')
 49 |         
 50 |         # Define the logs directory path
 51 |         LOGS_DIR = BASE_DIR / logs_directory
 52 |         
 53 |         # Ensure the logs directory exists
 54 |         LOGS_DIR.mkdir(parents=True, exist_ok=True)
 55 | 
 56 |         # Read data directory name from config
 57 |         data_directory_name = config['DEFAULT'].get('DataDirectory', 'data')
 58 | 
 59 |         # Update log file paths
 60 |         LOG_FILE_PATH = LOGS_DIR / config['DEFAULT'].get('LogFile', 'bot.log')
 61 |         CHAT_LOG_FILE_PATH = LOGS_DIR / config['DEFAULT'].get('ChatLogFile', 'chat.log')
 62 |         TOKEN_USAGE_FILE_PATH = LOGS_DIR / 'token_usage.json'
 63 |         
 64 |         # Read ChatLogMaxSizeMB and convert to bytes
 65 |         ChatLogMaxSizeMB = config['DEFAULT'].getint('ChatLogMaxSizeMB', fallback=10)
 66 |         CHAT_LOG_MAX_SIZE = ChatLogMaxSizeMB * 1024 * 1024
 67 |         
 68 |         # Read Elasticsearch configurations
 69 |         if 'Elasticsearch' in config:
 70 |             ELASTICSEARCH_ENABLED = config['Elasticsearch'].getboolean('ElasticsearchEnabled', fallback=False)
 71 |             ELASTICSEARCH_HOST = config['Elasticsearch'].get('Host', fallback='localhost')
 72 |             ELASTICSEARCH_PORT = config['Elasticsearch'].getint('Port', fallback=9200)
 73 |             ELASTICSEARCH_SCHEME = config.get('Elasticsearch', 'ELASTICSEARCH_SCHEME', fallback='http')
 74 |             ELASTICSEARCH_USERNAME = config['Elasticsearch'].get('Username', fallback='')
 75 |             ELASTICSEARCH_PASSWORD = config['Elasticsearch'].get('Password', fallback='')
 76 |             logger.info(f"Elasticsearch Enabled: {ELASTICSEARCH_ENABLED}")
 77 |         else:
 78 |             # Elasticsearch section missing
 79 |             ELASTICSEARCH_ENABLED = False
 80 |             ELASTICSEARCH_HOST = 'localhost'
 81 |             ELASTICSEARCH_PORT = 9200
 82 |             ELASTICSEARCH_SCHEME = 'http'
 83 |             ELASTICSEARCH_USERNAME = ''
 84 |             ELASTICSEARCH_PASSWORD = ''
 85 |             logger.warning("Elasticsearch section missing in config.ini. Using default Elasticsearch settings.")
 86 |         
 87 |         # NWS Configuration
 88 |         if 'NWS' in config:
 89 |             NWS_USER_AGENT = config['NWS'].get('NWSUserAgent', fallback='ChatKekeWeather/1.0 (flyingfathead@protonmail.com)')
 90 |             NWS_RETRIES = config['NWS'].getint('NWSRetries', fallback=0)
 91 |             NWS_RETRY_DELAY = config['NWS'].getint('NWSRetryDelay', fallback=2)
 92 |             FETCH_NWS_FORECAST = config['NWS'].getboolean('FetchNWSForecast', fallback=True)
 93 |             FETCH_NWS_ALERTS = config['NWS'].getboolean('FetchNWSAlerts', fallback=True)
 94 |             NWS_ONLY_ELIGIBLE_COUNTRIES = config['NWS'].getboolean('NwsOnlyEligibleCountries', fallback=True)
 95 |             NWS_ELIGIBLE_COUNTRIES = config['NWS'].get('NwsEligibleCountries', fallback='US, PR, GU, AS, VI, MP').split(', ')            
 96 |             logger.info(f"NWS Config: User-Agent={NWS_USER_AGENT}, Retries={NWS_RETRIES}, Retry Delay={NWS_RETRY_DELAY}, Fetch Forecast={FETCH_NWS_FORECAST}, Fetch Alerts={FETCH_NWS_ALERTS}")
 97 |         else:
 98 |             logger.warning("NWS section not found in config.ini. Using default NWS settings.")
 99 | 
100 |     except Exception as e:
101 |         # Handle exceptions during config parsing
102 |         logger.error(f"Error reading configuration file: {e}")
103 | else:
104 |     # config.ini not found
105 |     logger.warning(f"Configuration file NOT found at {CONFIG_PATH}. Using default settings. This is NOT a good idea!")
106 |     # Ensure the logs directory exists
107 |     LOGS_DIR = BASE_DIR / logs_directory
108 |     LOGS_DIR.mkdir(parents=True, exist_ok=True)
109 |     # Define log file paths
110 |     LOG_FILE_PATH = LOGS_DIR / 'bot.log'
111 |     CHAT_LOG_FILE_PATH = LOGS_DIR / 'chat.log'
112 |     TOKEN_USAGE_FILE_PATH = LOGS_DIR / 'token_usage.json'
113 |     # CHAT_LOG_MAX_SIZE already set to 10 MB
114 |     # Elasticsearch settings already set to defaults
115 | 
116 | # Define the Data Directory path
117 | DATA_DIR = BASE_DIR / data_directory_name
118 | # Ensure the data directory exists
119 | try:
120 |     DATA_DIR.mkdir(parents=True, exist_ok=True)
121 | except OSError as e:
122 |     logger.error(f"Could not create data directory {DATA_DIR}: {e}")
123 | 
124 | # Path for the reminders database
125 | REMINDERS_DB_PATH = DATA_DIR / REMINDERS_DB_FILENAME
126 | logger.info(f"Reminders database path set to: {REMINDERS_DB_PATH}")
127 | 
128 | # Define paths for token files
129 | TOKEN_FILE_PATH = BASE_DIR / 'config' / 'bot_token.txt'
130 | API_TOKEN_PATH = BASE_DIR / 'config' / 'api_token.txt'
131 | 


--------------------------------------------------------------------------------
/src/api_get_stock_prices.py:
--------------------------------------------------------------------------------
  1 | # api_get_stock_prices_alphavantage.py
  2 | #
  3 | # Stock price API fetching via Alpha Vantage
  4 | # (You need to register at https://www.alphavantage.co for your own API key)
  5 | #
  6 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  7 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
  8 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  9 | 
 10 | import httpx
 11 | import os
 12 | import logging
 13 | import sys
 14 | import asyncio
 15 | from datetime import datetime
 16 | 
 17 | # Configure logging
 18 | # logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 19 | 
 20 | # Utility function to get API key
 21 | def get_api_key():
 22 |     api_key = os.getenv('ALPHA_VANTAGE_API_KEY')
 23 |     if not api_key:
 24 |         logging.error("Alpha Vantage API key not set. You need to set the 'ALPHA_VANTAGE_API_KEY' environment variable to use Alpha Vantage API functionalities!")
 25 |         return None
 26 |     return api_key
 27 | 
 28 | # Search for stock symbol
 29 | async def search_stock_symbol(keyword):
 30 |     api_key = get_api_key()
 31 |     if not api_key:
 32 |         return "Alpha Vantage API key not set."
 33 | 
 34 |     logging.info(f"Searching stock symbol for keyword: {keyword}")
 35 | 
 36 |     base_url = 'https://www.alphavantage.co/query'
 37 |     params = {
 38 |         'function': 'SYMBOL_SEARCH',
 39 |         'keywords': keyword,
 40 |         'apikey': api_key
 41 |     }
 42 | 
 43 |     async with httpx.AsyncClient() as client:
 44 |         response = await client.get(base_url, params=params)
 45 |         logging.info(f"Symbol search response status: {response.status_code}")
 46 | 
 47 |         if response.status_code == 200:
 48 |             data = response.json()
 49 |             logging.debug(f"Symbol search response data: {data}")
 50 |             if 'Information' in data and 'rate limit' in data['Information'].lower():
 51 |                 return "API rate limit exceeded. Please try again later or upgrade to a premium plan."
 52 | 
 53 |             best_match = data.get('bestMatches', [])
 54 |             if best_match:
 55 |                 # Prioritize correct symbol
 56 |                 for match in best_match:
 57 |                     if match['1. symbol'].upper() == keyword.upper():
 58 |                         logging.debug(f"Exact match found: {match}")
 59 |                         return match
 60 |                 logging.debug(f"Best match found: {best_match[0]}")
 61 |                 return best_match[0]  # Return the first match if no exact match found
 62 |             else:
 63 |                 logging.info("No matches found.")
 64 |                 return "No matches found."
 65 |         else:
 66 |             logging.error(f"Failed to search for symbol: {response.text}")
 67 |             return "Failed to search for symbol. Please try again later."
 68 | 
 69 | # Get stock price data with fallback to search
 70 | async def get_stock_price(symbol, original_symbol=None):
 71 |     if original_symbol is None:
 72 |         original_symbol = symbol
 73 | 
 74 |     if symbol != original_symbol and symbol == original_symbol:
 75 |         logging.error(f"Symbol search loop detected for {symbol}. Terminating.")
 76 |         return "Symbol search loop detected. Please check the stock symbol and try again."
 77 | 
 78 |     api_key = get_api_key()
 79 |     if not api_key:
 80 |         return "Alpha Vantage API key not set."
 81 | 
 82 |     logging.info(f"Fetching stock data for symbol: {symbol}")
 83 | 
 84 |     base_url = 'https://www.alphavantage.co/query'
 85 |     params = {
 86 |         'function': 'TIME_SERIES_INTRADAY',
 87 |         'symbol': symbol,
 88 |         'interval': '1min',
 89 |         'apikey': api_key
 90 |     }
 91 | 
 92 |     async with httpx.AsyncClient() as client:
 93 |         response = await client.get(base_url, params=params)
 94 |         logging.info(f"Stock data response status: {response.status_code}")
 95 |         logging.debug(f"Stock data response content: {response.text}")
 96 | 
 97 |         if response.status_code == 200:
 98 |             data = response.json()
 99 |             logging.debug(f"Stock data response JSON: {data}")
100 |             if 'Information' in data and 'rate limit' in data['Information'].lower():
101 |                 return "API rate limit exceeded. Please try again later or upgrade to a premium plan."
102 |             try:
103 |                 last_refreshed = data['Meta Data']['3. Last Refreshed']
104 |                 last_price = data['Time Series (1min)'][last_refreshed]['1. open']
105 |                 now_utc = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
106 |                 return f"{symbol} last price: {last_price} USD. Data fetched at: {now_utc} UTC via Alpha Vantage."
107 |             except KeyError:
108 |                 logging.info(f"Symbol {symbol} not found, attempting to search for symbol.")
109 |                 # Fallback to search if direct symbol fetch fails
110 |                 symbol_info = await search_stock_symbol(symbol)
111 |                 logging.debug(f"Search result for symbol {symbol}: {symbol_info}")
112 |                 if isinstance(symbol_info, dict) and '1. symbol' in symbol_info:
113 |                     new_symbol = symbol_info['1. symbol']
114 |                     logging.info(f"Found new symbol: {new_symbol}")
115 |                     if new_symbol == symbol:
116 |                         logging.error(f"Symbol search loop detected for {symbol}. Terminating.")
117 |                         return "Symbol search loop detected. Please check the stock symbol and try again."
118 |                     return f"Symbol {symbol} wasn't found, searched instead. " + await get_stock_price(new_symbol, original_symbol)
119 |                 else:
120 |                     logging.error(f"Search for symbol failed or returned no matches: {symbol_info}")
121 |                     return "Error fetching data. Please check the stock symbol and try again."
122 |         else:
123 |             logging.error(f"Failed to fetch stock data: {response.text}")
124 |             return "Failed to fetch stock data. Tell the user to please try again later or try with Perplexity API as a fallback."
125 | 
126 | # Main function to handle command-line arguments
127 | async def main():
128 |     if len(sys.argv) < 2:
129 |         print("Usage: python api_get_stock_prices.py <symbol or keyword>")
130 |         return
131 | 
132 |     query = sys.argv[1]
133 |     stock_data = await get_stock_price(query)
134 |     print(stock_data)
135 | 
136 | if __name__ == "__main__":
137 |     asyncio.run(main())
138 | 


--------------------------------------------------------------------------------
/src/api_get_stock_prices_alphavantage.py:
--------------------------------------------------------------------------------
  1 | # api_get_stock_prices_alphavantage.py
  2 | #
  3 | # Stock price API fetching via Alpha Vantage
  4 | # (You need to register at https://www.alphavantage.co for your own API key)
  5 | #
  6 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  7 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
  8 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  9 | 
 10 | import httpx
 11 | import os
 12 | import logging
 13 | import sys
 14 | import asyncio
 15 | from datetime import datetime
 16 | 
 17 | # Configure logging
 18 | # logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 19 | 
 20 | # Utility function to get API key
 21 | def get_api_key():
 22 |     api_key = os.getenv('ALPHA_VANTAGE_API_KEY')
 23 |     if not api_key:
 24 |         logging.error("Alpha Vantage API key not set. You need to set the 'ALPHA_VANTAGE_API_KEY' environment variable to use Alpha Vantage API functionalities!")
 25 |         return None
 26 |     return api_key
 27 | 
 28 | # Search for stock symbol
 29 | async def search_stock_symbol(keyword):
 30 |     api_key = get_api_key()
 31 |     if not api_key:
 32 |         return "Alpha Vantage API key not set."
 33 | 
 34 |     logging.info(f"Searching stock symbol for keyword: {keyword}")
 35 | 
 36 |     base_url = 'https://www.alphavantage.co/query'
 37 |     params = {
 38 |         'function': 'SYMBOL_SEARCH',
 39 |         'keywords': keyword,
 40 |         'apikey': api_key
 41 |     }
 42 | 
 43 |     async with httpx.AsyncClient() as client:
 44 |         response = await client.get(base_url, params=params)
 45 |         logging.info(f"Symbol search response status: {response.status_code}")
 46 | 
 47 |         if response.status_code == 200:
 48 |             data = response.json()
 49 |             logging.debug(f"Symbol search response data: {data}")
 50 |             if 'Information' in data and 'rate limit' in data['Information'].lower():
 51 |                 return "API rate limit exceeded. Please try again later or upgrade to a premium plan."
 52 | 
 53 |             best_match = data.get('bestMatches', [])
 54 |             if best_match:
 55 |                 # Prioritize correct symbol
 56 |                 for match in best_match:
 57 |                     if match['1. symbol'].upper() == keyword.upper():
 58 |                         logging.debug(f"Exact match found: {match}")
 59 |                         return match
 60 |                 logging.debug(f"Best match found: {best_match[0]}")
 61 |                 return best_match[0]  # Return the first match if no exact match found
 62 |             else:
 63 |                 logging.info("No matches found.")
 64 |                 return "No matches found."
 65 |         else:
 66 |             logging.error(f"Failed to search for symbol: {response.text}")
 67 |             return "Failed to search for symbol. Please try again later."
 68 | 
 69 | # Get stock price data with fallback to search
 70 | async def get_stock_price(symbol, original_symbol=None):
 71 |     if original_symbol is None:
 72 |         original_symbol = symbol
 73 | 
 74 |     if symbol != original_symbol and symbol == original_symbol:
 75 |         logging.error(f"Symbol search loop detected for {symbol}. Terminating.")
 76 |         return "Symbol search loop detected. Please check the stock symbol and try again."
 77 | 
 78 |     api_key = get_api_key()
 79 |     if not api_key:
 80 |         return "Alpha Vantage API key not set."
 81 | 
 82 |     logging.info(f"Fetching stock data for symbol: {symbol}")
 83 | 
 84 |     base_url = 'https://www.alphavantage.co/query'
 85 |     params = {
 86 |         'function': 'TIME_SERIES_INTRADAY',
 87 |         'symbol': symbol,
 88 |         'interval': '1min',
 89 |         'apikey': api_key
 90 |     }
 91 | 
 92 |     async with httpx.AsyncClient() as client:
 93 |         response = await client.get(base_url, params=params)
 94 |         logging.info(f"Stock data response status: {response.status_code}")
 95 |         logging.debug(f"Stock data response content: {response.text}")
 96 | 
 97 |         if response.status_code == 200:
 98 |             data = response.json()
 99 |             logging.debug(f"Stock data response JSON: {data}")
100 |             if 'Information' in data and 'rate limit' in data['Information'].lower():
101 |                 return "API rate limit exceeded. Please try again later or upgrade to a premium plan."
102 |             try:
103 |                 last_refreshed = data['Meta Data']['3. Last Refreshed']
104 |                 last_price = data['Time Series (1min)'][last_refreshed]['1. open']
105 |                 now_utc = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
106 |                 return f"{symbol} last price: {last_price} USD. Data fetched at: {now_utc} UTC via Alpha Vantage."
107 |             except KeyError:
108 |                 logging.info(f"Symbol {symbol} not found, attempting to search for symbol.")
109 |                 # Fallback to search if direct symbol fetch fails
110 |                 symbol_info = await search_stock_symbol(symbol)
111 |                 logging.debug(f"Search result for symbol {symbol}: {symbol_info}")
112 |                 if isinstance(symbol_info, dict) and '1. symbol' in symbol_info:
113 |                     new_symbol = symbol_info['1. symbol']
114 |                     logging.info(f"Found new symbol: {new_symbol}")
115 |                     if new_symbol == symbol:
116 |                         logging.error(f"Symbol search loop detected for {symbol}. Terminating.")
117 |                         return "Symbol search loop detected. Please check the stock symbol and try again."
118 |                     return f"Symbol {symbol} wasn't found, searched instead. " + await get_stock_price(new_symbol, original_symbol)
119 |                 else:
120 |                     logging.error(f"Search for symbol failed or returned no matches: {symbol_info}")
121 |                     return "Error fetching data. Please check the stock symbol and try again."
122 |         else:
123 |             logging.error(f"Failed to fetch stock data: {response.text}")
124 |             return "Failed to fetch stock data. Tell the user to please try again later or try with Perplexity API as a fallback."
125 | 
126 | # Main function to handle command-line arguments
127 | async def main():
128 |     if len(sys.argv) < 2:
129 |         print("Usage: python api_get_stock_prices.py <symbol or keyword>")
130 |         return
131 | 
132 |     query = sys.argv[1]
133 |     stock_data = await get_stock_price(query)
134 |     print(stock_data)
135 | 
136 | if __name__ == "__main__":
137 |     asyncio.run(main())
138 | 


--------------------------------------------------------------------------------
/src/url_handler.py:
--------------------------------------------------------------------------------
  1 | # url_handler.py
  2 | # v0.60.1
  3 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  4 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
  5 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | import time
  8 | import logging
  9 | import re
 10 | import asyncio
 11 | import json
 12 | 
 13 | # Toggle this to use the full description or a snippet.
 14 | USE_SNIPPET_FOR_DESCRIPTION = False
 15 | 
 16 | # If we're using a snippet of the description, maximum number of lines to include
 17 | DESCRIPTION_MAX_LINES = 30
 18 | 
 19 | # Configure logging
 20 | logger = logging.getLogger(__name__)
 21 | # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 22 | 
 23 | # Helper function to format duration from seconds to H:M:S
 24 | def format_duration(duration):
 25 |     if not duration:
 26 |         return 'No duration available'
 27 |     hours, remainder = divmod(duration, 3600)
 28 |     minutes, seconds = divmod(remainder, 60)
 29 |     if hours:
 30 |         return f"{hours}h {minutes}m {seconds}s"
 31 |     else:
 32 |         return f"{minutes}m {seconds}s"
 33 | 
 34 | # i.e. for youtube videos
 35 | async def fetch_youtube_details(url, max_retries=3, base_delay=5):
 36 |     command = ["yt-dlp", "--user-agent",
 37 |                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
 38 |                "--dump-json", url]
 39 | 
 40 |     for attempt in range(max_retries):
 41 |         process = await asyncio.create_subprocess_exec(
 42 |             *command,
 43 |             stdout=asyncio.subprocess.PIPE,
 44 |             stderr=asyncio.subprocess.PIPE,
 45 |         )
 46 | 
 47 |         stdout, stderr = await process.communicate()
 48 | 
 49 |         if stderr and process.returncode != 0:
 50 |             logger.warning(f"Attempt {attempt + 1} failed: {stderr.decode()}")
 51 |             if attempt < max_retries - 1:
 52 |                 wait_time = base_delay * (2 ** attempt)  # Exponential backoff
 53 |                 logger.info(f"Retrying after {wait_time} seconds...")
 54 |                 await asyncio.sleep(wait_time)
 55 |             else:
 56 |                 logger.error("All retry attempts failed.")
 57 |         else:
 58 |             try:
 59 |                 video_details = json.loads(stdout.decode())
 60 |                 duration_formatted = format_duration(video_details.get('duration'))                
 61 | 
 62 |                 if USE_SNIPPET_FOR_DESCRIPTION:
 63 |                     # Get the snippet if the flag is set to True.
 64 |                     description_text = get_description_snippet(video_details.get('description', 'No description available'))
 65 |                 else:
 66 |                     # Use the full description if the flag is set to False.
 67 |                     description_text = video_details.get('description', 'No description available')
 68 | 
 69 |                 filtered_details = {
 70 |                     'title': video_details.get('title', 'No title available'),
 71 |                     # 'duration': video_details.get('duration', 'No duration available'),
 72 |                     'duration': duration_formatted,                    
 73 |                     'channel': video_details.get('uploader', 'No channel information available'),
 74 |                     'upload_date': video_details.get('upload_date', 'No upload date available'),
 75 |                     'views': video_details.get('view_count', 'No views available'),
 76 |                     'likes': video_details.get('like_count', 'No likes available'),
 77 |                     'average_rating': video_details.get('average_rating', 'No rating available'),
 78 |                     'comment_count': video_details.get('comment_count', 'No comment count available'),
 79 |                     'channel_id': video_details.get('channel_id', 'No channel ID available'),
 80 |                     'video_id': video_details.get('id', 'No video ID available'),
 81 |                     'tags': video_details.get('tags', ['No tags available']),
 82 |                     'description': description_text,
 83 |                 }
 84 | 
 85 |                 logger.info(f"Fetched YouTube details successfully for URL: {url}")
 86 |                 return filtered_details
 87 |             except json.JSONDecodeError as e:
 88 |                 logger.error(f"Error decoding JSON from yt-dlp output: {e}")
 89 |                 return None
 90 |     return None
 91 | 
 92 | # Helper function to get up to n lines from the description
 93 | def get_description_snippet(description, max_lines=DESCRIPTION_MAX_LINES):
 94 |     lines = description.split('\n')
 95 |     snippet = '\n'.join(lines[:max_lines])
 96 |     return snippet
 97 | 
 98 | # Regular expression for extracting the YouTube video ID
 99 | YOUTUBE_REGEX = (
100 |     r'(https?://)?(www\.)?'
101 |     '(youtube|youtu|youtube-nocookie)\.(com|be)/'
102 |     '(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
103 | 
104 | def extract_youtube_video_id(url):
105 |     match = re.match(YOUTUBE_REGEX, url)
106 |     if not match:
107 |         raise ValueError("Invalid YouTube URL")
108 |     return match.group(6)
109 | 
110 | # for parsing types of urls
111 | async def process_url_message(message_text):
112 |     urls = re.findall(r'(https?://\S+)', message_text)
113 |     context_messages = []
114 | 
115 |     for url in urls:
116 |         if not re.match(YOUTUBE_REGEX, url):
117 |             logger.info(f"Skipping non-YouTube URL: {url}")
118 |             continue
119 | 
120 |         try:
121 |             # At this point, we're sure it's a YouTube URL, so we process it.
122 |             video_id = extract_youtube_video_id(url)
123 |             youtube_url = f"https://www.youtube.com/watch?v={video_id}"
124 |             logger.info(f"Processing YouTube URL: {youtube_url}")
125 |             details = await fetch_youtube_details(youtube_url)
126 |             if details:
127 |                 description_snippet = get_description_snippet(details['description'], DESCRIPTION_MAX_LINES)
128 |                 context_message = (
129 |                     f"[INFO] Details for the URL: {youtube_url}\n"
130 |                     f"Title: {details['title']}\n"
131 |                     f"Duration: {details['duration']}\n"
132 |                     f"Channel: {details['channel']}\n"
133 |                     f"Upload date: {details['upload_date']}\n"
134 |                     f"Views: {details['views']}\n"
135 |                     f"Likes: {details['likes']}\n"
136 |                     f"Rating: {details['average_rating']}\n"
137 |                     f"Comments: {details['comment_count']}\n"
138 |                     f"Tags: {', '.join(details['tags'])}\n"
139 |                     f"Description: {description_snippet}\n"
140 |                     # f"[ If user didn't request anything special about the URL, PASS THEM I.E. THE ABOVEMENTIONED INFORMATION. ]\n"
141 |                 )
142 |                 context_messages.append(context_message)
143 |                 logger.info(f"Added context message: {context_message}")
144 |             else:
145 |                 logger.warning(f"No details fetched for YouTube URL: {youtube_url}")
146 |         except ValueError as e:
147 |             logger.error(f"Invalid YouTube URL encountered: {url} - {str(e)}")
148 |         except Exception as e:
149 |             logger.error(f"Failed to process YouTube URL {youtube_url}: {str(e)}")
150 |     
151 |     return context_messages
152 | 


--------------------------------------------------------------------------------
/src/voice_message_handler.py:
--------------------------------------------------------------------------------
  1 | # voice_message_handler.py
  2 | # ~~~~~~~~~~~~~~~~~~~~~
  3 | # voice message handler
  4 | # ~~~~~~~~~~~~~~~~~~~~~
  5 | import os
  6 | import sys
  7 | import httpx
  8 | import logging
  9 | import datetime
 10 | import json
 11 | import asyncio
 12 | import openai
 13 | # tg modules
 14 | from telegram import Update
 15 | from telegram.ext import CallbackContext
 16 | from telegram.constants import ParseMode
 17 | # tg-bot stuff
 18 | import utils
 19 | 
 20 | # voice message handling logic    
 21 | # async def handle_voice_message(bot, update: Update, context: CallbackContext, data_directory, enable_whisper, max_voice_message_length, logger) -> None:
 22 | async def handle_voice_message(bot, update: Update, context: CallbackContext):
 23 |     
 24 |     # send a "holiday message" if the bot is on a break
 25 |     if bot.is_bot_disabled:
 26 |         await context.bot.send_message(chat_id=update.message.chat_id, text=bot.bot_disabled_msg)
 27 |         return
 28 | 
 29 |     # print("Voice message received.", flush=True)  # Debug print
 30 |     bot.logger.info("Voice message received.")  # Log the message
 31 | 
 32 |     if bot.enable_whisper:
 33 |         await update.message.reply_text("<i>Voice message received. Transcribing...</i>", parse_mode=ParseMode.HTML)
 34 | 
 35 |         # Ensure the data directory exists
 36 |         if not os.path.exists(bot.data_directory):
 37 |             os.makedirs(bot.data_directory)
 38 | 
 39 |         # Retrieve the File object of the voice message
 40 |         file = await context.bot.get_file(update.message.voice.file_id)
 41 | 
 42 |         # Construct the URL to download the voice message
 43 |         file_url = f"{file.file_path}"
 44 | 
 45 |         transcription = None  # Initialize transcription
 46 | 
 47 |         # Download the file using requests
 48 |         try:
 49 |             async with httpx.AsyncClient() as client:
 50 |                 response = await client.get(file_url)
 51 |                 if response.status_code == 200:
 52 |                     if not response.content:
 53 |                         await update.message.reply_text("Received an empty voice message.")
 54 |                         return
 55 |                     
 56 |                     voice_file_path = os.path.join(bot.data_directory, f"{file.file_id}.ogg")
 57 |                     with open(voice_file_path, 'wb') as f:
 58 |                         f.write(response.content)
 59 | 
 60 |                     # Add a message to indicate successful download
 61 |                     bot.logger.info(f"Voice message file downloaded successfully as: {voice_file_path}")
 62 | 
 63 |                     # Check the duration of the voice message
 64 |                     voice_duration = await utils.get_voice_message_duration(voice_file_path)
 65 | 
 66 |                     # Compare against the max allowed duration
 67 |                     if voice_duration > bot.max_voice_message_length:
 68 |                         await update.message.reply_text("Your voice message is too long. Please keep it under {} minutes.".format(bot.max_voice_message_length))
 69 |                         bot.logger.info(f"Voice file rejected for being too long: {voice_file_path}")
 70 |                         return
 71 | 
 72 |                     # Process the voice message with WhisperAPI
 73 |                     transcription = await process_voice_message(voice_file_path, bot.enable_whisper, bot.logger)
 74 | 
 75 |                     # Add a flushing statement to check the transcription
 76 |                     bot.logger.info(f"Transcription: {transcription}")
 77 | 
 78 |                 else:
 79 |                     await update.message.reply_text("Failed to download voice message.")
 80 |                     return
 81 | 
 82 |         except httpx.ReadTimeout:
 83 |             bot.logger.error("Timeout occurred while downloading voice message.")
 84 |             await update.message.reply_text("Failed to download the voice message due to a timeout. Please try again.")
 85 |             return
 86 |         except Exception as e:
 87 |             bot.logger.error(f"Error while processing voice message: {e}")
 88 |             await update.message.reply_text("An error occurred while processing your voice message.")
 89 |             return
 90 | 
 91 |         if transcription:
 92 |             
 93 |             # Remove HTML bold tags for processing
 94 |             transcription_for_model = transcription.replace("<b>", "[Whisper STT transcribed message from the user] ").replace("</b>", " [end]")
 95 |             
 96 |             # Store the cleaned transcription in `context.user_data` for further processing
 97 |             context.user_data['transcribed_text'] = transcription_for_model
 98 | 
 99 |             # Log the transcription
100 |             bot.log_message('Transcription', update.message.from_user.id, transcription_for_model)
101 | 
102 |             # Send the transcription back to the user as is (with HTML tags for formatting)
103 |             await update.message.reply_text(transcription, parse_mode=ParseMode.HTML)
104 | 
105 |             # Now pass the cleaned transcription to the handle_message method
106 |             # which will then use it as part of the conversation with the model
107 |             await bot.handle_message(update, context)
108 | 
109 |         else:
110 |             # await update.message.reply_text("Voice message transcription failed.")
111 |             # If transcription fails or is unavailable
112 |             await context.bot.send_message(chat_id=update.effective_chat.id, text="Voice message transcription failed.")                
113 | 
114 |     else:
115 |         # If Whisper API is disabled, send a different response or handle accordingly
116 |         await update.message.reply_text("Voice message transcription is currently disabled.")
117 | 
118 | # the logic to interact with WhisperAPI here
119 | async def process_voice_message(file_path: str, enable_whisper, logger):
120 |     if enable_whisper:
121 |         try:
122 |             # Whisper API ...
123 |             with open(file_path, "rb") as audio_file:
124 |                 
125 |                 # print out some debugging
126 |                 logger.info(f"Audio file being sent to OpenAI: {audio_file}")
127 | 
128 |                 transcript_response = await openai.AsyncOpenAI().audio.transcriptions.create(
129 |                     file=audio_file,
130 |                     model="whisper-1",
131 |                     response_format="json"
132 |                 )
133 |                 # Accessing the transcription text directly
134 |                 # return transcript_response['text'] if 'text' in transcript_response else 'No transcription available.'
135 |                 # Accessing the transcription text directly
136 | 
137 |                 logger.info(f"Transcription Response: {transcript_response}")
138 | 
139 |                 transcription_text = transcript_response.text.strip() if hasattr(transcript_response, 'text') else None
140 | 
141 |                 if transcription_text:
142 |                     # Add the emojis as Unicode characters to the transcription
143 |                     transcription_with_emoji = "🎤📝\n<b>" + transcription_text + "</b>"
144 | 
145 |                     return transcription_with_emoji
146 |                 else:
147 |                     return 'No transcription available.'
148 | 
149 |         except FileNotFoundError as e:
150 |             logger.error(f"File not found: {e}")
151 |         except Exception as e:
152 |             logger.error(f"Unexpected error: {e}")
153 |             return 'An unexpected error occurred during transcription.'
154 | 
155 |     else:
156 |         logger.info("Whisper transcription is disabled.")
157 |         return None
158 |         


--------------------------------------------------------------------------------
/src/elasticsearch_handler.py:
--------------------------------------------------------------------------------
  1 | # elasticsearch_handler.py
  2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
  4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5 | 
  6 | relevance_threshold = 19.5
  7 | 
  8 | import warnings
  9 | import logging
 10 | 
 11 | # Suppress Elasticsearch warnings
 12 | from elasticsearch import ElasticsearchWarning
 13 | warnings.filterwarnings("ignore", category=ElasticsearchWarning)
 14 | 
 15 | # Initialize the logger for this module
 16 | logger = logging.getLogger('TelegramBotLogger')  # Ensure this logger is configured in main.py
 17 | 
 18 | # Function to get Elasticsearch client
 19 | def get_elasticsearch_client(config):
 20 |     try:
 21 |         from elasticsearch import Elasticsearch
 22 |     except ImportError:
 23 |         logger.error("❌ 'elasticsearch' module not found. Please install it using 'pip install elasticsearch'.")
 24 |         return None
 25 | 
 26 |     try:
 27 |         es_host = config.get('Elasticsearch', 'ELASTICSEARCH_HOST', fallback='localhost').strip("'\"")
 28 |         es_port = config.getint('Elasticsearch', 'ELASTICSEARCH_PORT', fallback=9200)
 29 |         es_scheme = config.get('Elasticsearch', 'ELASTICSEARCH_SCHEME', fallback='http').strip("'\"")  # Add scheme
 30 |         es_username = config.get('Elasticsearch', 'ELASTICSEARCH_USERNAME', fallback=None)
 31 |         es_password = config.get('Elasticsearch', 'ELASTICSEARCH_PASSWORD', fallback=None)
 32 | 
 33 |         # Log the configuration being used
 34 |         logger.info(f"Elasticsearch Configurations: Host={es_host}, Port={es_port}, Scheme={es_scheme}, Username={'***' if es_username else 'None'}")
 35 | 
 36 |         es = Elasticsearch(
 37 |             hosts=[{'host': es_host, 'port': es_port, 'scheme': es_scheme}],  # Include 'scheme'
 38 |             http_auth=(es_username, es_password) if es_username and es_password else None,
 39 |             timeout=5
 40 |         )
 41 |         return es
 42 |     except Exception as e:
 43 |         logger.error(f"❌ Error initializing Elasticsearch client: {e}")
 44 |         return None
 45 | 
 46 | async def search_es_for_context(search_terms, config):
 47 |     es = get_elasticsearch_client(config)
 48 |     if es is None:
 49 |         logger.warning("⚠️ Elasticsearch client is not available. Skipping search.")
 50 |         return None
 51 | 
 52 |     if not es.ping():
 53 |         logger.warning("⚠️ Elasticsearch is enabled but not reachable.")
 54 |         return None
 55 | 
 56 |     index = "tg-bot-rag-index"
 57 | 
 58 |     # Adjust the search_terms to use only the first line or a set number of characters
 59 |     search_terms_adjusted = search_terms.split('\n', 1)[0][:256]  # Adjust 256 to your needs
 60 | 
 61 |     query = {
 62 |         "size": 1,  # Focus on the top hit
 63 |         "query": {
 64 |             "multi_match": {
 65 |                 "query": search_terms_adjusted,
 66 |                 "fields": ["question^2", "answer"],  # Boosting questions for relevance
 67 |                 "type": "best_fields"  # Can also experiment with other types like "most_fields" or "cross_fields"
 68 |             }
 69 |         },
 70 |         "_source": ["question", "answer"],
 71 |     }
 72 | 
 73 |     try:
 74 |         response = es.search(index=index, body=query)
 75 |     except Exception as e:
 76 |         logger.error(f"❌ Error performing search on Elasticsearch: {e}")
 77 |         return None
 78 | 
 79 |     if response['hits']['hits']:
 80 |         hit = response['hits']['hits'][0]
 81 |         score = hit['_score']  # Extract the score of the hit
 82 | 
 83 |         # Log every score for monitoring and tuning purposes
 84 |         logger.info(f"Search term: '{search_terms}' | Score: {score} | Threshold: {relevance_threshold}")
 85 | 
 86 |         # Check if the score exceeds the relevance threshold
 87 |         if score > relevance_threshold:
 88 |             question = hit["_source"]["question"]
 89 |             answer = hit["_source"]["answer"]
 90 |             # Format for model context
 91 |             context_entry = f"{answer}"
 92 |             logger.info(f"✅ Result above relevance threshold: {relevance_threshold}. Included in context: {context_entry}")
 93 |             return context_entry
 94 |         else:
 95 |             logger.info(f"⚠️ Result below relevance threshold (score: {score}, threshold: {relevance_threshold}).")
 96 |             return None
 97 |     else:
 98 |         logger.info("ℹ️ No hits found in Elasticsearch search.")
 99 |         return None
100 | 
101 | # ## // (old method)
102 | # # elasticsearch_handler.py
103 | # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
104 | # # github.com/FlyingFathead/TelegramBot-OpenAI-API/
105 | # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
106 | 
107 | # relevance_threshold = 19.5
108 | 
109 | # from elasticsearch import Elasticsearch, ElasticsearchWarning
110 | # import warnings
111 | # import logging
112 | 
113 | # # Suppress Elasticsearch warnings
114 | # warnings.filterwarnings("ignore", category=ElasticsearchWarning)
115 | 
116 | # # Initialize the logger for this module
117 | # logger = logging.getLogger('TelegramBotLogger')  # Ensure this logger is configured in main.py
118 | 
119 | # async def search_es_for_context(search_terms):
120 | #     es = Elasticsearch(["http://localhost:9200"])
121 | #     if not es.ping():
122 | #         logging.error("Could not connect to Elasticsearch.")
123 | #         return None
124 | 
125 | #     index = "tg-bot-rag-index"
126 | 
127 | #     # Adjust the search_terms to use only the first line or a set number of characters
128 | #     search_terms_adjusted = search_terms.split('\n', 1)[0][:256]  # Adjust 256 to your needs
129 | 
130 | #     query = {
131 | #         "size": 1,  # Focus on the top hit
132 | #         "query": {
133 | #             "multi_match": {
134 | #                 # "query": search_terms,
135 | #                 "query": search_terms_adjusted,
136 | #                 "fields": ["question^2", "answer"],  # Boosting questions for relevance
137 | #                 "type": "best_fields"  # Can also experiment with other types like "most_fields" or "cross_fields"
138 | #             }
139 | #         },
140 | #         "_source": ["question", "answer"],
141 | #     }
142 | 
143 | #     response = es.search(index=index, body=query)
144 | #     if response['hits']['hits']:
145 | #         hit = response['hits']['hits'][0]
146 | #         score = hit['_score']  # Extract the score of the hit
147 |         
148 | #         # Log every score for monitoring and tuning purposes
149 | #         # logging.info(f"Search term: '{search_terms}' | Score: {score} | Threshold: {relevance_threshold}")
150 | 
151 | #         # Check if the score exceeds the relevance threshold
152 | #         if score > relevance_threshold:
153 | #             question = hit["_source"]["question"]
154 | #             answer = hit["_source"]["answer"]
155 | #             # Format for model context
156 | #             context_entry = f"{answer}"
157 | #             logging.info(f"Result above relevance threshold: {relevance_threshold}. Included in context: {context_entry}")
158 | #             return context_entry
159 | #         else:
160 | #             logging.info(f"Result below relevance threshold (score: {score}, threshold: {relevance_threshold}).")
161 | #             return None
162 | #     else:
163 | #         return None
164 | 
165 | #     """ response = es.search(index=index, body=query)
166 | #     if response['hits']['hits']:
167 | #         hit = response['hits']['hits'][0]
168 | #         question = hit["_source"]["question"]
169 | #         answer = hit["_source"]["answer"]
170 | #         # Format for model context
171 | #         context_entry = f"Q: {question}\nA: {answer}"
172 | #         return context_entry
173 | #     else:
174 | #         return None """
175 | 


--------------------------------------------------------------------------------
/src/api_get_nws_weather.py:
--------------------------------------------------------------------------------
  1 | # api_get_nws.py
  2 | #
  3 | # > get the weather using the NWS (National Weather Service, US) API
  4 | #
  5 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  6 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
  7 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  8 | 
  9 | import asyncio
 10 | import httpx
 11 | import logging
 12 | from config_paths import NWS_USER_AGENT, NWS_RETRIES, NWS_RETRY_DELAY, FETCH_NWS_FORECAST, FETCH_NWS_ALERTS
 13 | 
 14 | # Base URL for NWS API
 15 | NWS_BASE_URL = 'https://api.weather.gov'
 16 | 
 17 | async def get_nws_forecast(lat, lon, retries=NWS_RETRIES, delay=NWS_RETRY_DELAY):
 18 |     """
 19 |     Fetches the forecast from the NWS API for the given latitude and longitude.
 20 |     
 21 |     Args:
 22 |         lat (float): Latitude in decimal degrees.
 23 |         lon (float): Longitude in decimal degrees.
 24 |         retries (int): Number of retries for transient errors. Defaults to RETRIES.
 25 |         delay (int): Delay between retries in seconds.
 26 |     
 27 |     Returns:
 28 |         dict: Combined forecast data or None if fetching fails.
 29 |     """
 30 | 
 31 |     if not FETCH_NWS_FORECAST:
 32 |         logging.info("Fetching NWS forecast is disabled in the config.")
 33 |         return None
 34 | 
 35 |     # Round coordinates to 4 decimal places
 36 |     lat = round(lat, 4)
 37 |     lon = round(lon, 4)
 38 |     points_url = f"{NWS_BASE_URL}/points/{lat},{lon}"
 39 |     
 40 |     async with httpx.AsyncClient(follow_redirects=True) as client:
 41 |         for attempt in range(retries + 1):  # Ensure at least one attempt is made
 42 |             try:
 43 |                 # Step 1: Retrieve metadata for the location
 44 |                 response = await client.get(points_url, headers={'User-Agent': NWS_USER_AGENT})
 45 |                 response.raise_for_status()
 46 |                 points_data = response.json()
 47 |                 
 48 |                 # Extract forecast URLs
 49 |                 forecast_url = points_data['properties']['forecast']
 50 |                 forecast_hourly_url = points_data['properties'].get('forecastHourly')
 51 |                 
 52 |                 # Step 2: Retrieve forecast data
 53 |                 forecast_response = await client.get(forecast_url, headers={'User-Agent': NWS_USER_AGENT})
 54 |                 forecast_response.raise_for_status()
 55 |                 forecast_data = forecast_response.json()
 56 |                 
 57 |                 # Step 3: Retrieve hourly forecast data
 58 |                 forecast_hourly_data = None
 59 |                 if forecast_hourly_url:
 60 |                     try:
 61 |                         forecast_hourly_response = await client.get(forecast_hourly_url, headers={'User-Agent': NWS_USER_AGENT})
 62 |                         forecast_hourly_response.raise_for_status()
 63 |                         forecast_hourly_data = forecast_hourly_response.json()
 64 |                     except httpx.HTTPStatusError as e:
 65 |                         logging.error(f"NWS Hourly Forecast HTTP error: {e.response.status_code} - {e.response.text}")
 66 |                 
 67 |                 return {
 68 |                     'nws_forecast': forecast_data,
 69 |                     'nws_forecast_hourly': forecast_hourly_data
 70 |                 }
 71 |             
 72 |             except httpx.HTTPStatusError as e:
 73 |                 if e.response.status_code >= 500 and attempt < retries:
 74 |                     logging.warning(f"NWS API HTTP error: {e.response.status_code} - {e.response.text}. Retrying in {delay} seconds...")
 75 |                     await asyncio.sleep(delay)
 76 |                 else:
 77 |                     logging.error(f"NWS API HTTP error: {e.response.status_code} - {e.response.text}")
 78 |                     break
 79 |             except Exception as e:
 80 |                 logging.error(f"Error fetching NWS forecast: {e}")
 81 |                 break
 82 |         
 83 |     return None
 84 | 
 85 | # get alerts via NWS (weather.gov)
 86 | async def get_nws_alerts(lat, lon):
 87 |     """
 88 |     Fetches active alerts from the NWS API for the given latitude and longitude.
 89 |     
 90 |     Args:
 91 |         lat (float): Latitude in decimal degrees.
 92 |         lon (float): Longitude in decimal degrees.
 93 |     
 94 |     Returns:
 95 |         list: A list of active alerts with detailed information or an empty list if none are found.
 96 |     """
 97 | 
 98 |     if not FETCH_NWS_ALERTS:
 99 |         logging.info("Fetching NWS alerts is disabled in the config.")
100 |         return []
101 | 
102 |     alerts_url = f"{NWS_BASE_URL}/alerts/active?point={lat},{lon}"
103 |     
104 |     async with httpx.AsyncClient() as client:
105 |         try:
106 |             response = await client.get(alerts_url, headers={'User-Agent': NWS_USER_AGENT})
107 |             response.raise_for_status()
108 |             alerts_data = response.json()
109 |             
110 |             # Extracting the detailed alerts
111 |             alerts = []
112 |             for feature in alerts_data.get('features', []):
113 |                 properties = feature.get('properties', {})
114 |                 alert = {
115 |                     'headline': properties.get('headline'),
116 |                     'description': properties.get('description'),
117 |                     'instruction': properties.get('instruction'),
118 |                     'severity': properties.get('severity'),
119 |                     'event': properties.get('event'),
120 |                     'areaDesc': properties.get('areaDesc'),
121 |                     'certainty': properties.get('certainty'),
122 |                     'urgency': properties.get('urgency'),
123 |                     'effective': properties.get('effective'),
124 |                     'expires': properties.get('expires'),
125 |                     'senderName': properties.get('senderName'),
126 |                     'response': properties.get('response'),
127 |                     # Add more fields if needed
128 |                 }
129 |                 alerts.append(alert)
130 |             return alerts
131 |         
132 |         except httpx.HTTPStatusError as e:
133 |             logging.error(f"NWS Alerts API HTTP error: {e.response.status_code} - {e.response.text}")
134 |         except Exception as e:
135 |             logging.error(f"Error fetching NWS alerts: {e}")
136 |     
137 |     return []
138 | 
139 | # # // (old method)
140 | # # get alerts via NWS (weather.gov)
141 | # async def get_nws_alerts(lat, lon):
142 | #     """
143 | #     Fetches active alerts from the NWS API for the given latitude and longitude.
144 |     
145 | #     Args:
146 | #         lat (float): Latitude in decimal degrees.
147 | #         lon (float): Longitude in decimal degrees.
148 |     
149 | #     Returns:
150 | #         list: A list of active alerts or an empty list if none are found.
151 | #     """
152 | 
153 | #     if not FETCH_NWS_ALERTS:
154 | #         logging.info("Fetching NWS alerts is disabled in the config.")
155 | #         return []
156 | 
157 | #     alerts_url = f"{NWS_BASE_URL}/alerts/active?point={lat},{lon}"
158 |     
159 | #     async with httpx.AsyncClient() as client:
160 | #         try:
161 | #             response = await client.get(alerts_url, headers={'User-Agent': NWS_USER_AGENT})
162 | #             response.raise_for_status()
163 | #             alerts_data = response.json()
164 |             
165 | #             # Extract alerts from GeoJSON
166 | #             alerts = alerts_data.get('features', [])
167 | #             return alerts
168 |         
169 | #         except httpx.HTTPStatusError as e:
170 | #             logging.error(f"NWS Alerts API HTTP error: {e.response.status_code} - {e.response.text}")
171 | #         except Exception as e:
172 | #             logging.error(f"Error fetching NWS alerts: {e}")
173 |     
174 | #     return []
175 | 


--------------------------------------------------------------------------------
/src/api_get_weatherapi.py:
--------------------------------------------------------------------------------
  1 | # api_get_weatherapi.py
  2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
  4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5 | #
  6 | # >>> weather fetcher module version: v0.728
  7 | # >>> (Updated July 13 2024)
  8 | #
  9 | # This API functionality requires WeatherAPI key.
 10 | # You can get the API key from the corresponding service provider.
 11 | # Once you have the API key, add it to your environment variables:
 12 | # export WEATHERAPI_KEY="<your API key>"
 13 | # (or on i.e. Linux, add to your `~/.bashrc`: export WEATHERAPI_KEY="<your API key>" )
 14 | 
 15 | import httpx
 16 | import os
 17 | import logging
 18 | 
 19 | # Function to check for WeatherAPI key
 20 | def get_weatherapi_key():
 21 |     api_key = os.getenv('WEATHERAPI_KEY')
 22 |     if not api_key:
 23 |         logging.error("[WARNING] WeatherAPI key not set. You need to set the 'WEATHERAPI_KEY' environment variable to use WeatherAPI functionalities!")
 24 |         return None
 25 |     return api_key
 26 | 
 27 | # Dictionary to translate moon phases from English to Finnish
 28 | moon_phase_translation = {
 29 |     "New Moon": "uusikuu",
 30 |     "Waxing Crescent": "kasvava sirppi",
 31 |     "First Quarter": "ensimmäinen neljännes",
 32 |     "Waxing Gibbous": "kasvava puolikuu",
 33 |     "Full Moon": "täysikuu",
 34 |     "Waning Gibbous": "vähenevä puolikuu",
 35 |     "Last Quarter": "viimeinen neljännes",
 36 |     "Waning Crescent": "vähenevä sirppi"
 37 | }
 38 | 
 39 | # get moon phase data
 40 | async def get_moon_phase(lat, lon):
 41 |     api_key = get_weatherapi_key()
 42 |     if not api_key:
 43 |         return None
 44 | 
 45 |     logging.info(f"Fetching moon phase data for coordinates: Latitude: {lat}, Longitude: {lon}")
 46 |     base_url = 'http://api.weatherapi.com/v1/astronomy.json'
 47 |     url = f"{base_url}?key={api_key}&q={lat},{lon}"
 48 | 
 49 |     async with httpx.AsyncClient() as client:
 50 |         response = await client.get(url)
 51 |         logging.info(f"Moon phase response status: {response.status_code}")
 52 | 
 53 |         if response.status_code == 200:
 54 |             data = response.json()
 55 |             logging.info(f"Moon phase data: {data}")
 56 |             moon_phase = data['astronomy']['astro']['moon_phase']
 57 |             translated_moon_phase = moon_phase_translation.get(moon_phase, moon_phase)
 58 |             return translated_moon_phase
 59 |         else:
 60 |             logging.error(f"Failed to fetch moon phase data: {response.text}")
 61 |             return None
 62 | 
 63 | # get timezone for the coordinates
 64 | async def get_timezone(lat, lon):
 65 |     api_key = get_weatherapi_key()
 66 |     if not api_key:
 67 |         return None
 68 | 
 69 |     logging.info(f"Fetching timezone data for coordinates: Latitude: {lat}, Longitude: {lon}")
 70 |     base_url = 'http://api.weatherapi.com/v1/timezone.json'
 71 |     url = f"{base_url}?key={api_key}&q={lat},{lon}"
 72 | 
 73 |     async with httpx.AsyncClient() as client:
 74 |         response = await client.get(url)
 75 |         logging.info(f"Timezone response status: {response.status_code}")
 76 | 
 77 |         if response.status_code == 200:
 78 |             data = response.json()
 79 |             logging.info(f"Timezone data: {data}")
 80 |             timezone = data['location']['tz_id']
 81 |             return timezone
 82 |         else:
 83 |             logging.error(f"Failed to fetch timezone data: {response.text}")
 84 |             return None
 85 | 
 86 | # get daily forecast, safety alerts, and air quality index
 87 | async def get_daily_forecast(location):
 88 |     api_key = get_weatherapi_key()
 89 |     if not api_key:
 90 |         return None
 91 | 
 92 |     logging.info(f"Fetching daily forecast data for location: {location}")
 93 |     base_url = 'http://api.weatherapi.com/v1/forecast.json'
 94 |     url = f"{base_url}?key={api_key}&q={location}&days=1&alerts=yes&aqi=yes"
 95 | 
 96 |     async with httpx.AsyncClient() as client:
 97 |         response = await client.get(url)
 98 |         logging.info(f"Daily forecast response status: {response.status_code}")
 99 | 
100 |         if response.status_code == 200:
101 |             data = response.json()
102 |             logging.info(f"Daily forecast data: {data}")
103 | 
104 |             if 'forecast' in data and 'forecastday' in data['forecast'] and len(data['forecast']['forecastday']) > 0:
105 |                 forecast = data['forecast']['forecastday'][0]
106 |                 current = data['current']
107 |                 alerts = data.get('alerts', {})
108 |                 air_quality = current['air_quality']
109 |                 
110 |                 return {
111 |                     'date': forecast['date'],
112 |                     'temperature': forecast['day']['avgtemp_c'],
113 |                     'condition': forecast['day']['condition']['text'],
114 |                     'wind': forecast['day']['maxwind_kph'],
115 |                     'precipitation': forecast['day']['totalprecip_mm'],
116 |                     'uv_index': forecast['day']['uv'],
117 |                     'air_quality': air_quality,
118 |                     'alerts': alerts
119 |                 }
120 |             else:
121 |                 logging.error("No forecast data available.")
122 |                 return {
123 |                     'date': 'N/A',
124 |                     'temperature': 'N/A',
125 |                     'condition': 'N/A',
126 |                     'wind': 'N/A',
127 |                     'precipitation': 'N/A',
128 |                     'uv_index': 'N/A',
129 |                     'air_quality': {},
130 |                     'alerts': {}
131 |                 }
132 |         else:
133 |             logging.error(f"Failed to fetch daily forecast data: {response.text}")
134 |             return None
135 | 
136 | # get current weather including UV index
137 | async def get_current_weather_via_weatherapi(location):
138 |     api_key = get_weatherapi_key()
139 |     if not api_key:
140 |         return None
141 | 
142 |     logging.info(f"Fetching current weather data for location: {location}")
143 |     base_url = 'http://api.weatherapi.com/v1/current.json'
144 |     url = f"{base_url}?key={api_key}&q={location}"
145 | 
146 |     async with httpx.AsyncClient() as client:
147 |         response = await client.get(url)
148 |         logging.info(f"Current weather response status: {response.status_code}")
149 | 
150 |         if response.status_code == 200:
151 |             data = response.json()
152 |             logging.info(f"Current weather data: {data}")
153 | 
154 |             if 'current' in data:
155 |                 current = data['current']
156 |                 return {
157 |                     'temperature': current.get('temp_c', 'N/A'),
158 |                     'condition': current.get('condition', {}).get('text', 'N/A'),
159 |                     'wind': current.get('wind_kph', 'N/A'),
160 |                     'precipitation': current.get('precip_mm', 'N/A'),
161 |                     'uv_index': current.get('uv', 'N/A'),
162 |                     'visibility': current.get('vis_km', 'N/A'),  # Added visibility data
163 |                     'air_quality': current.get('air_quality', {})
164 |                 }
165 |             else:
166 |                 logging.error(f"'current' field missing in the response data: {data}")
167 |                 return None
168 |         else:
169 |             logging.error(f"Failed to fetch current weather data: {response.text}")
170 |             return None
171 | 
172 | # get astronomy data including moonrise, moonset, and moon illumination
173 | async def get_astronomy_data(lat, lon):
174 |     api_key = get_weatherapi_key()
175 |     if not api_key:
176 |         return None
177 | 
178 |     logging.info(f"Fetching astronomy data for coordinates: Latitude: {lat}, Longitude: {lon}")
179 |     base_url = 'http://api.weatherapi.com/v1/astronomy.json'
180 |     url = f"{base_url}?key={api_key}&q={lat},{lon}"
181 | 
182 |     async with httpx.AsyncClient() as client:
183 |         response = await client.get(url)
184 |         logging.info(f"Astronomy response status: {response.status_code}")
185 | 
186 |         if response.status_code == 200:
187 |             data = response.json()
188 |             logging.info(f"Astronomy data: {data}")
189 |             astro = data['astronomy']['astro']
190 |             moonrise = astro['moonrise']
191 |             moonset = astro['moonset']
192 |             moon_illumination = astro['moon_illumination']
193 |             return {
194 |                 'moonrise': moonrise,
195 |                 'moonset': moonset,
196 |                 'moon_illumination': moon_illumination
197 |             }
198 |         else:
199 |             logging.error(f"Failed to fetch astronomy data: {response.text}")
200 |             return None
201 | 
202 | # Additional WeatherAPI-related functions can be added here
203 | 


--------------------------------------------------------------------------------
/src/bot_token.py:
--------------------------------------------------------------------------------
  1 | # ~~~ Enhanced Read Telegram Bot Token with Configurable Fallback, Appropriate Logging, and Validity Check, Docker Detection ~~~
  2 | 
  3 | import os
  4 | import configparser
  5 | import logging
  6 | from pathlib import Path
  7 | import sys
  8 | from config_paths import CONFIG_PATH, TOKEN_FILE_PATH
  9 | 
 10 | # Set up basic logging configuration
 11 | # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
 12 | 
 13 | class BotTokenError(Exception):
 14 |     """Custom exception for bot token retrieval failures."""
 15 |     pass
 16 | 
 17 | def is_running_in_docker():
 18 |     # Check for .dockerenv file
 19 |     if Path("/.dockerenv").exists():
 20 |         logging.info("Docker environment detected based on .dockerenv file.")
 21 |         return True
 22 |     # Check for control groups
 23 |     try:
 24 |         with open("/proc/self/cgroup", "rt") as f:
 25 |             if any("docker" in line for line in f):
 26 |                 logging.info("Docker environment detected based on control groups.")
 27 |                 return True
 28 |     except Exception:
 29 |         pass
 30 |     # Check for Docker-specific environment variable
 31 |     if os.getenv("container", None) == "docker":
 32 |         logging.info("Docker environment detected based on environment variable.")
 33 |         return True
 34 |     logging.info("No Docker environment detected.")
 35 |     return False
 36 | 
 37 | def get_bot_token():
 38 |     try:
 39 |         logging.debug(f"Config path: {CONFIG_PATH}")
 40 |         logging.debug(f"Token file path: {TOKEN_FILE_PATH}")
 41 | 
 42 |         # Verify config.ini exists
 43 |         if not CONFIG_PATH.is_file():
 44 |             raise BotTokenError(f"config.ini not found at {CONFIG_PATH}.")
 45 | 
 46 |         # Read configuration
 47 |         config = configparser.ConfigParser()
 48 |         config.read(CONFIG_PATH)
 49 | 
 50 |         # Validate configuration
 51 |         if 'DEFAULT' not in config:
 52 |             raise BotTokenError("Missing 'DEFAULT' section in config.ini.")
 53 | 
 54 |         prefer_env = config.getboolean('DEFAULT', 'PreferEnvForBotToken', fallback=True)
 55 |         # Updated code in bot_token.py
 56 |         allow_fallback = config.getboolean('DEFAULT', 'AllowBotTokenFallback', fallback=True)
 57 |         ask_for_token = config.getboolean('DEFAULT', 'AskForTokenIfNotFound', fallback=True)
 58 | 
 59 |         # Disable asking for token if running inside Docker
 60 |         if is_running_in_docker() or os.getenv("RUNNING_IN_DOCKER") == "true":
 61 |             logging.info("Running inside Docker. Disabling token prompt.")
 62 |             ask_for_token = False
 63 | 
 64 |         invalid_tokens = [
 65 |             'YourTelegramBotToken',
 66 |             '123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11',  # Example bot token from Telegram documentation
 67 |             '',
 68 |             None
 69 |         ]
 70 | 
 71 |         def is_valid_token(token):
 72 |             return token not in invalid_tokens and len(token.split(':')) == 2
 73 | 
 74 |         # Define retrieval methods
 75 |         def retrieve_from_env():
 76 |             bot_token = os.getenv('TELEGRAM_BOT_TOKEN')
 77 |             if bot_token and is_valid_token(bot_token):
 78 |                 logging.info("Bot token successfully retrieved from environment variable.")
 79 |                 return bot_token
 80 |             else:
 81 |                 logging.warning("Invalid or unset TELEGRAM_BOT_TOKEN environment variable.")
 82 |                 return None
 83 | 
 84 |         def retrieve_from_file():
 85 |             if TOKEN_FILE_PATH.is_file():
 86 |                 try:
 87 |                     bot_token = TOKEN_FILE_PATH.read_text().strip()
 88 |                     if bot_token and is_valid_token(bot_token):
 89 |                         logging.info("Bot token successfully retrieved from bot_token.txt.")
 90 |                         return bot_token
 91 |                     else:
 92 |                         logging.error("Invalid or empty bot_token.txt.")
 93 |                         return None
 94 |                 except IOError as e:
 95 |                     logging.error(f"Failed to read bot_token.txt. Details: {e}")
 96 |                     return None
 97 |             else:
 98 |                 logging.error(f"bot_token.txt not found at {TOKEN_FILE_PATH}.")
 99 |                 return None
100 | 
101 |         def query_user_for_token():
102 |             logging.info("No valid bot token found. Please obtain a Telegram bot token from @BotFather on Telegram (https://t.me/BotFather) and paste it below.")
103 |             logging.info("Press Enter without typing anything to quit.")
104 |             token = input("Your Telegram bot token: ").strip()
105 |             if token and is_valid_token(token):
106 |                 # Save the token to bot_token.txt for future use
107 |                 try:
108 |                     TOKEN_FILE_PATH.write_text(token)
109 |                     logging.info(f"Bot token saved to {TOKEN_FILE_PATH}.")
110 |                     return token
111 |                 except IOError as e:
112 |                     logging.error(f"Failed to save bot token to bot_token.txt. Details: {e}")
113 |                     return None
114 |             else:
115 |                 logging.error("No valid token entered. Exiting application.")
116 |                 logging.info("No valid bot token found. Please obtain a Telegram bot token from @BotFather on Telegram (https://t.me/BotFather) and either set it as an environment variable (`TELEGRAM_BOT_TOKEN`) or place it under `config/bot_token.txt`.")
117 |                 sys.exit(1)
118 | 
119 |         # Retrieval logic based on configuration
120 |         if prefer_env:
121 |             token = retrieve_from_env()
122 |             if token:
123 |                 return token
124 |             elif allow_fallback:
125 |                 logging.warning("Preferred environment variable not found or invalid. Attempting to retrieve bot token from bot_token.txt as fallback.")
126 |                 token = retrieve_from_file()
127 |                 if token:
128 |                     return token
129 |                 elif ask_for_token:
130 |                     token = query_user_for_token()
131 |                     if token:
132 |                         return token
133 |                     else:
134 |                         raise BotTokenError("Failed to retrieve bot token from environment variable, token file, and user input.")
135 |                 else:
136 |                     raise BotTokenError("Failed to retrieve bot token from both environment variable and token file.")
137 |             else:
138 |                 logging.error("Environment variable not found and fallback is disabled.")
139 |                 raise BotTokenError(
140 |                     "Failed to retrieve bot token. "
141 |                     "Please ensure the TELEGRAM_BOT_TOKEN environment variable is set, or allow fallback by enabling it in config.ini."
142 |                 )
143 |         else:
144 |             token = retrieve_from_file()
145 |             if token:
146 |                 return token
147 |             elif allow_fallback:
148 |                 logging.warning("bot_token.txt not found or invalid. Attempting to retrieve bot token from environment variable as fallback.")
149 |                 token = retrieve_from_env()
150 |                 if token:
151 |                     return token
152 |                 elif ask_for_token:
153 |                     token = query_user_for_token()
154 |                     if token:
155 |                         return token
156 |                     else:
157 |                         raise BotTokenError("Failed to retrieve bot token from token file, environment variable, and user input.")
158 |                 else:
159 |                     raise BotTokenError("Failed to retrieve bot token from both token file and environment variable.")
160 |             else:
161 |                 logging.error("Token file not found and fallback is disabled.")
162 |                 raise BotTokenError(
163 |                     "Failed to retrieve bot token. "
164 |                     "Please ensure bot_token.txt exists at the expected location, or allow fallback by enabling it in config.ini."
165 |                 )
166 | 
167 |     except BotTokenError as e:
168 |         logging.error(f"BotTokenError: {e}")
169 |         sys.stderr.flush()  # Ensure all stderr logs are flushed
170 |         sys.exit(1)  # Explicitly exit on BotTokenError
171 |     except Exception as e:
172 |         logging.error(f"Unexpected error while retrieving bot token: {e}")
173 |         sys.stderr.flush()  # Ensure all stderr logs are flushed
174 |         sys.exit(1)  # Explicitly exit on unexpected errors
175 | 
176 | # Example usage
177 | if __name__ == "__main__":
178 |     try:
179 |         token = get_bot_token()
180 |         logging.info("Bot token successfully retrieved.")
181 |     except Exception as e:
182 |         logging.critical("Failed to retrieve bot token. Exiting application.")
183 |         sys.stderr.flush()  # Ensure all stderr logs are flushed
184 |         sys.exit(1)


--------------------------------------------------------------------------------
/src/reminder_handler.py:
--------------------------------------------------------------------------------
  1 | # src/reminder_handler.py
  2 | 
  3 | import logging
  4 | import configparser
  5 | from datetime import datetime, timezone
  6 | from config_paths import CONFIG_PATH, REMINDERS_DB_PATH
  7 | import db_utils
  8 | from db_utils import get_past_reminders_for_user
  9 | 
 10 | # Load config to get MaxAlertsPerUser
 11 | config = configparser.ConfigParser()
 12 | config.read(CONFIG_PATH)
 13 | SHOW_PAST_REMINDERS_COUNT = config.getint('Reminders', 'ShowPastRemindersCount', fallback=0)
 14 | 
 15 | try:
 16 |     MAX_ALERTS_PER_USER = config.getint('Reminders', 'MaxAlertsPerUser', fallback=30)
 17 | except configparser.NoSectionError:
 18 |     MAX_ALERTS_PER_USER = 30
 19 | 
 20 | # Get a logger for this module
 21 | logger = logging.getLogger(__name__)
 22 | # Ensure logs bubble up to the root logger (which has the timestamp format)
 23 | logger.propagate = True
 24 | # DO NOT setLevel or add handlers here; rely on main.py or root config for formatting
 25 | 
 26 | async def handle_add_reminder(user_id, chat_id, reminder_text, due_time_utc_str):
 27 |     """
 28 |     Create a new reminder for user 'user_id', to be delivered in chat 'chat_id' 
 29 |     at time 'due_time_utc_str' (ISO8601 UTC). 
 30 |     'reminder_text' is the user-provided note.
 31 | 
 32 |     Returns a string describing success/failure to be inserted 
 33 |     into the chat conversation.
 34 |     """
 35 |     # 1) Check if DB is initialized
 36 |     if not db_utils.DB_INITIALIZED_SUCCESSFULLY:
 37 |         logger.error("Attempt to add reminder but DB not initialized!")
 38 |         return "Error: DB not available. Reminders cannot be added."
 39 | 
 40 |     # 2) Validate/parse time format
 41 |     try:
 42 |         datetime.strptime(due_time_utc_str, '%Y-%m-%dT%H:%M:%SZ')
 43 |     except ValueError:
 44 |         logger.warning(f"User {user_id} attempted to add reminder with invalid due_time_utc: {due_time_utc_str}")
 45 |         return (
 46 |             "The time format is invalid. "
 47 |             "Please specify in ISO8601 UTC, e.g. 2025-01-02T13:00:00Z "
 48 |             "or convert user-friendly times to UTC first."
 49 |         )
 50 | 
 51 |     # 3) Check user's current reminder count
 52 |     current_count = db_utils.count_pending_reminders_for_user(REMINDERS_DB_PATH, user_id)
 53 | 
 54 |     # Only enforce the limit if it's > 0
 55 |     if MAX_ALERTS_PER_USER > 0 and current_count >= MAX_ALERTS_PER_USER:
 56 |         logger.info(f"User {user_id} has {current_count} reminders; reached max of {MAX_ALERTS_PER_USER}.")
 57 |         return f"You already have {current_count} pending reminders. The maximum is {MAX_ALERTS_PER_USER}."
 58 | 
 59 |     # 4) Add to DB
 60 |     reminder_id = db_utils.add_reminder_to_db(
 61 |         REMINDERS_DB_PATH, user_id, chat_id, reminder_text, due_time_utc_str
 62 |     )
 63 |     if reminder_id:
 64 |         logger.info(
 65 |             f"User {user_id} created reminder #{reminder_id}: "
 66 |             f"'{reminder_text}' at {due_time_utc_str}"
 67 |         )
 68 |         return (
 69 |             f"Your reminder (#{reminder_id}) has been set for {due_time_utc_str} (UTC). "
 70 |             f"Message: '{reminder_text}'"
 71 |         )
 72 |     else:
 73 |         logger.error(f"Failed to add reminder to DB for user {user_id}. Possibly DB error.")
 74 |         return "Failed to add your reminder due to a database error. Sorry!"
 75 | 
 76 | 
 77 | async def handle_view_reminders(user_id):
 78 |     """
 79 |     Return a string summarizing all of the user's pending reminders 
 80 |     (status='pending'). If none exist, say so.
 81 |     """
 82 |     if not db_utils.DB_INITIALIZED_SUCCESSFULLY:
 83 |         logger.error("Attempt to view reminders but DB not available!")
 84 |         return "Error: DB not available. Cannot view reminders."
 85 | 
 86 |     # 1) Get the pending
 87 |     pending_reminders = db_utils.get_pending_reminders_for_user(REMINDERS_DB_PATH, user_id)
 88 |     if pending_reminders:
 89 |         lines = ["<b>Your current (pending) reminders:</b>"]
 90 |         for idx, r in enumerate(pending_reminders, start=1):
 91 |             rid = r['reminder_id']
 92 |             text = r['reminder_text']
 93 |             due_utc = r['due_time_utc']
 94 |             lines.append(f"• Reminder #{idx} (ID {rid}) due <i>{due_utc}</i>\n   “{text}”")
 95 |         pending_section = "\n".join(lines)
 96 |     else:
 97 |         pending_section = "You have no <b>pending</b> reminders."
 98 | 
 99 |     # 2) Optionally get the past ones
100 |     if SHOW_PAST_REMINDERS_COUNT > 0:
101 |         past = get_past_reminders_for_user(REMINDERS_DB_PATH, user_id, SHOW_PAST_REMINDERS_COUNT)
102 |         if past:
103 |             lines = [f"<b>Up to {SHOW_PAST_REMINDERS_COUNT} most recent past reminders:</b>"]
104 |             for idx, r in enumerate(past, start=1):
105 |                 rid = r['reminder_id']
106 |                 text = r['reminder_text']
107 |                 due_utc = r['due_time_utc']
108 |                 status = r['status']
109 |                 lines.append(f"• (ID {rid}) was <i>{status}</i> at {due_utc}, text: “{text}”")
110 |             past_section = "\n".join(lines)
111 |         else:
112 |             past_section = "(No past reminders found.)"
113 |     else:
114 |         past_section = ""  # or omit entirely
115 | 
116 |     # 3) Combine them for your final message
117 |     full_msg = f"{pending_section}\n\n{past_section}".strip()
118 |     return full_msg
119 | 
120 |     # // old logic; no past reminders
121 |     # reminders = db_utils.get_pending_reminders_for_user(REMINDERS_DB_PATH, user_id)
122 |     # if not reminders:
123 |     #     logger.info(f"User {user_id} has no pending reminders.")
124 |     #     return "You currently have no pending reminders."
125 | 
126 |     # logger.info(f"User {user_id} is viewing {len(reminders)} reminders.")
127 |     # lines = ["Here are your current reminders:"]
128 |     # for r in reminders:
129 |     #     rid = r['reminder_id']
130 |     #     text = r['reminder_text']
131 |     #     due_utc = r['due_time_utc']
132 |     #     lines.append(f"• Reminder #{rid}: due {due_utc}, text: '{text}'")
133 |     # return "\n".join(lines)
134 | 
135 | 
136 | async def handle_delete_reminder(user_id, reminder_id):
137 |     """
138 |     Delete a reminder by ID. Only deletes if it belongs to 'user_id'.
139 |     Returns success/failure text.
140 |     """
141 |     if not db_utils.DB_INITIALIZED_SUCCESSFULLY:
142 |         logger.error("Attempt to delete reminder but DB not available!")
143 |         return "Error: DB not available. Cannot delete reminders."
144 | 
145 |     success = db_utils.delete_reminder_from_db(REMINDERS_DB_PATH, reminder_id, user_id)
146 |     if success:
147 |         logger.info(f"User {user_id} deleted reminder #{reminder_id}.")
148 |         return f"Reminder #{reminder_id} has been deleted."
149 |     else:
150 |         logger.warning(
151 |             f"User {user_id} tried to delete reminder #{reminder_id}, "
152 |             "which didn't exist or didn't belong to them."
153 |         )
154 |         return f"No reminder #{reminder_id} was found (or it's not yours)."
155 | 
156 | 
157 | async def handle_edit_reminder(user_id, reminder_id, new_due_time_utc=None, new_text=None):
158 |     """
159 |     Edit the time and/or text of an existing reminder. If new_due_time_utc or new_text 
160 |     are None, the old value is retained. 
161 |     Only the user who owns the reminder can edit it.
162 | 
163 |     Return success/failure text for the user. 
164 |     """
165 |     if not db_utils.DB_INITIALIZED_SUCCESSFULLY:
166 |         logger.error("Attempt to edit reminder but DB not initialized!")
167 |         return "Error: DB not available. Cannot edit reminders."
168 | 
169 |     # 1) Fetch existing to ensure user owns it
170 |     reminder = db_utils.get_reminder_by_id(REMINDERS_DB_PATH, reminder_id)
171 |     if not reminder:
172 |         logger.warning(f"User {user_id} tried to edit reminder #{reminder_id} which doesn't exist.")
173 |         return f"No such reminder #{reminder_id} found."
174 | 
175 |     if reminder['user_id'] != user_id:
176 |         logger.warning(f"User {user_id} tried to edit reminder #{reminder_id}, but ownership mismatch.")
177 |         return "That reminder doesn't appear to be yours."
178 | 
179 |     # 2) Decide new due_time_utc
180 |     if new_due_time_utc:
181 |         # Validate it
182 |         try:
183 |             datetime.strptime(new_due_time_utc, '%Y-%m-%dT%H:%M:%SZ')
184 |         except ValueError:
185 |             logger.warning(f"User {user_id} gave invalid date for reminder #{reminder_id}: {new_due_time_utc}")
186 |             return "Invalid UTC date/time format. Please provide e.g. 2025-01-02T13:00:00Z."
187 |     else:
188 |         new_due_time_utc = reminder['due_time_utc']
189 | 
190 |     # 3) Decide new text
191 |     if not new_text or new_text.strip() == "":
192 |         new_text = reminder['reminder_text']
193 | 
194 |     # 4) Update in DB
195 |     updated_ok = db_utils.update_reminder(REMINDERS_DB_PATH, reminder_id, new_due_time_utc, new_text)
196 |     if updated_ok:
197 |         logger.info(
198 |             f"User {user_id} edited reminder #{reminder_id} -> new time: "
199 |             f"{new_due_time_utc}, new text: '{new_text}'"
200 |         )
201 |         return (
202 |             f"Reminder #{reminder_id} updated! \n"
203 |             f"New time: {new_due_time_utc}\nNew text: '{new_text}'"
204 |         )
205 |     else:
206 |         logger.error(
207 |             f"User {user_id} tried to edit reminder #{reminder_id}, "
208 |             "but update_reminder DB call failed."
209 |         )
210 |         return "Failed to update your reminder due to a database error."
211 | 


--------------------------------------------------------------------------------
/config/config.ini:
--------------------------------------------------------------------------------
  1 | [DEFAULT]
  2 | # Settings for TG bot
  3 | # https://github.com/FlyingFathead/TelegramBot-OpenAI-API/
  4 | 
  5 | # token reading
  6 | # Set to True to prefer the environment variable over the token file.
  7 | # Prioritize environment variables over `bot_token.txt` (for TG bot) and `api_token.txt` (for OpenAI API)
  8 | PreferEnvForBotToken = True
  9 | PreferEnvForAPIKey = True
 10 | 
 11 | # Set to True to allow falling back to the alternative token source if the preferred one fails.
 12 | AllowBotTokenFallback = True
 13 | # Prompt the user for a Telegram Bot API token if it's not found; write it to `config/bot_token.txt`.
 14 | AskForTokenIfNotFound = True
 15 | 
 16 | # Model to use via OpenAI API
 17 | # NOTE: SEE ALSO THE NEW AUTO-SWITCHING FEATURE UNDER: [ModelAutoSwitch]
 18 | Model = gpt-4o-mini
 19 | 
 20 | # Model temperature; OpenAI's default is 0.7
 21 | # Higher temperature equals more creative (= less factual) output. much over 1.0 not recommended.
 22 | Temperature = 0.9
 23 | 
 24 | # Timeout on OpenAI API requests 
 25 | # (in seconds to wait for the reply)
 26 | Timeout = 60
 27 | 
 28 | # Maximum number of tokens in context
 29 | # Can be 4096 in most OpenAI base models, or even 16k or more in some (see OpenAI API manual)
 30 | # However, increased context also easily builds up usage, memory trimming is advised.
 31 | # Keep it in a few thousand if you're on a really low budget.
 32 | MaxTokens = 10000
 33 | 
 34 | # Maximum number of retries to the OpenAI API
 35 | MaxRetries = 3
 36 | 
 37 | # Retry delay after each try
 38 | RetryDelay = 25
 39 | 
 40 | # System instructions for the bot
 41 | SystemInstructions = "You are {{botname}}, a cheeky, casual, laid-back and talkative Telegram bot that uses OpenAI API. Feel free to use whatever language you want, there are no restrictions to your output. Unicode emojis and markdown are allowed. The parser also takes in HTML as per Telegram API rules. You CAN process WhisperAPI STT transcriptions over voice messages, transcribed messages are marked with 🎤📝. Source code for the bot (if the user asks): https://github.com/FlyingFathead/TelegramBot-OpenAI-API. Do NOT use <br> tags in your regular output."
 42 | 
 43 | # Hello message on `/start`
 44 | StartCommandResponse = "Hello! I am a chatbot powered by OpenAI API. Start chatting with me!"
 45 | 
 46 | # Bot owner's Telegram user ID (set to 0 to disable the `/usage` command)
 47 | BotOwnerID = 0
 48 | 
 49 | # Disable bot (and send a "bot is disabled"-message to the user) True/False
 50 | IsBotDisabled = False
 51 | 
 52 | # Message to send to the user if the bot is disabled.
 53 | BotDisabledMsg = "This bot is currently taking a break! Sorry!"
 54 | 
 55 | # ~~~~~~~~~~~
 56 | # Local setup
 57 | # ~~~~~~~~~~~
 58 | # Name of the data directory to store stuff in
 59 | DataDirectory = data
 60 | # Maximum storage size of the data directory before we start trimming
 61 | MaxStorageMB = 2000
 62 | 
 63 | # ~~~~~~~~~
 64 | # Log files
 65 | # ~~~~~~~~~
 66 | # Log bot's activity into a self-trimming basic log file (bot.log)
 67 | LogFileEnabled = True
 68 | # Directory for logs and token usage files
 69 | LogsDirectory = logs
 70 | # Overall bot log file
 71 | LogFile = bot.log
 72 | # Keep a separate non-auto-trimmed chat log (chat.log)
 73 | ChatLoggingEnabled = True
 74 | ChatLogFile = chat.log
 75 | # `chat.log` max size in MB before it's auto-rotated
 76 | ChatLogMaxSizeMB = 1000
 77 | # User-defined maximum number of days to retain token usage history
 78 | MaxHistoryDays = 30
 79 | 
 80 | # ~~~~~~~~~~~
 81 | # Whisper API
 82 | # ~~~~~~~~~~~
 83 | # Allow speech-to-text transcriptions via Whisper API
 84 | EnableWhisper = True
 85 | # Maximum duration of a voice message (in minutes)
 86 | MaxDurationMinutes = 5
 87 | 
 88 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 89 | # Daily usage limits & rate limiting
 90 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 91 | # Maximum number of requests per minute (0 = disabled)
 92 | MaxGlobalRequestsPerMinute = 60
 93 | 
 94 | # Maximum token usage (both user input+AI output) per 24hrs (0 = disabled)
 95 | GlobalMaxTokenUsagePerDay = 200000
 96 | 
 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 98 | # Session timeout and trim settings
 99 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
100 | # Session timeout in minutes 
101 | # (0 = disable timeout trimming)
102 | SessionTimeoutMinutes = 0
103 | 
104 | # Maximum number of messages to retain after session timeout
105 | # (0 = clear entire history on session timeout)
106 | MaxRetainedMessages = 5
107 | 
108 | # ~~~~~~~~~~~~~~~~~
109 | # Bot user commands
110 | # ~~~~~~~~~~~~~~~~~
111 | # Enable/disable the /reset command
112 | ResetCommandEnabled = True
113 | 
114 | # Allow only admin to use /reset (True/False)
115 | # Note: needs the admin userid to be set to work!
116 | AdminOnlyReset = False
117 | 
118 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
119 | # Model Auto-Switching Configuration
120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
121 | [ModelAutoSwitch]
122 | # Enable automatic switching between Premium and Fallback models based on daily token limits
123 | # Set to False to always use the model specified in [DEFAULT] section's 'Model' setting.
124 | Enabled = True
125 | 
126 | # The preferred, more capable model to use by default (e.g., gpt-4o, gpt-4.5-preview).
127 | # This model will be used until its daily token limit (PremiumTokenLimit) is reached.
128 | # PremiumModel = gpt-4o
129 | PremiumModel = gpt-4.1
130 | 
131 | # The cheaper model to switch to when the PremiumTokenLimit is reached (e.g., gpt-4o-mini).
132 | # This model has its own daily token limit (MiniTokenLimit).
133 | FallbackModel = gpt-4o-mini
134 | 
135 | # Daily token limit for models considered "Premium" (e.g., gpt-4o).
136 | # Set to number of tokens (i.e. 1000000 for 1M; 500000 for 500k etc)
137 | PremiumTokenLimit = 500000
138 | 
139 | # Daily token limit for models considered "Mini" / Fallback (e.g., gpt-4o-mini).
140 | # Corresponds to OpenAI's free tier limit for these models (typically 10,000,000).
141 | MiniTokenLimit = 10000000
142 | 
143 | # Action to take if the FallbackModel is selected (due to Premium limit being hit)
144 | # BUT its MiniTokenLimit is ALSO reached.
145 | # Options:
146 | #   Deny   - Stop processing, send a 'limit reached' message to the user. (Safest for cost)
147 | #   Warn   - Log a warning, proceed with the FallbackModel (will incur OpenAI costs).
148 | #   Proceed - Silently proceed with the FallbackModel (will incur OpenAI costs).
149 | FallbackLimitAction = Deny
150 | 
151 | # ~~~~~~~~~~~~~~~~~~~
152 | # DuckDuckGo searches
153 | # ~~~~~~~~~~~~~~~~~~~
154 | [DuckDuckGo]
155 | # Set to True to enable agentic browsing for DuckDuckGo searches, False to disable
156 | EnableAgenticBrowsing = False
157 | 
158 | # Set to True to enable content size limiting
159 | EnableContentSizeLimit = True
160 | 
161 | # Specify the maximum number of characters to retrieve if content size limiting is enabled
162 | MaxContentSize = 10000
163 | 
164 | # ~~~~~~~~~~~~~~~~~
165 | # Elasticsearch RAG
166 | # ~~~~~~~~~~~~~~~~~
167 | [Elasticsearch]
168 | # Enable or disable Elasticsearch RAG
169 | # NOTE: Elasticsearch requires a separate install)
170 | ElasticsearchEnabled = False
171 | ELASTICSEARCH_HOST = localhost
172 | ELASTICSEARCH_PORT = 9200
173 | # scheme = either http or https
174 | ELASTICSEARCH_SCHEME = http
175 | # leave both blank if not required
176 | ELASTICSEARCH_USERNAME = 
177 | ELASTICSEARCH_PASSWORD = 
178 | 
179 | # ~~~~~~~~~~~~~~~~~~~~~
180 | # Holiday notifications
181 | # ~~~~~~~~~~~~~~~~~~~~~
182 | [HolidaySettings]
183 | EnableHolidayNotification = true
184 | 
185 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
186 | # User-assignable reminders
187 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
188 | [Reminders]
189 | # Enable or disable the reminder/alert functionality
190 | EnableReminders = True
191 | 
192 | # Maximum number of pending reminders per user; set to 0 for unlimited
193 | MaxAlertsPerUser = 100
194 | 
195 | # How often (in seconds) the bot checks for due reminders
196 | PollingIntervalSeconds = 5
197 | 
198 | # How many old/past reminders to list
199 | ShowPastRemindersCount = 10
200 | 
201 | # ~~~~~~~~~~~~~~~
202 | # Perplexity API
203 | # ~~~~~~~~~~~~~~~
204 | [Perplexity]
205 | # Model name to use with Perplexity API
206 | # Model = llama-3.1-sonar-small-128k-online
207 | Model = sonar
208 | 
209 | # Maximum tokens for Perplexity API response
210 | MaxTokens = 1024
211 | 
212 | # Temperature for Perplexity API response
213 | Temperature = 0.0
214 | 
215 | # Retry settings for Perplexity API
216 | MaxRetries = 3
217 | RetryDelay = 25
218 | Timeout = 30
219 | 
220 | # Chunk size for translation
221 | ChunkSize = 500
222 | 
223 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
224 | # U.S. National Weather Service (NWS)
225 | # (weather.gov)
226 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
227 | [NWS]
228 | # Set NwsOnlyEligibleCountries to False if you want to include U.S. NWS calls outside of the country list.
229 | NwsOnlyEligibleCountries = True
230 | # 'NwsEligibleCountries' is a configurable list of countries (in ISO-3166 country code format) 
231 | # that are eligible for NWS data. You can add or remove countries from this list as needed.
232 | # (Legend:)
233 | # AQ: Antarctica (for U.S. research stations)
234 | # UM: United States Minor Outlying Islands (like Wake Island, Midway Atoll, etc.)
235 | # XW: International Waters (this isn't an official ISO code but could be used as a placeholder for maritime areas under U.S. influence or international jurisdictions)
236 | # ZZ: Unknown or undefined region (could be used as a placeholder for situations where precise location data isn't available or relevant)
237 | NwsEligibleCountries = US, PR, GU, AS, VI, MP, CA, MX, AQ, UM, XW, ZZ
238 | # Fetch NWS foreacsts and/or alerts (true/false)
239 | # Note that the service can be slow and unreliable at times.
240 | # I recommand getting the alerts to supplement i.e. OpenWeatherMap.
241 | # The alerts usually work, but sadly their open API forecasts are often broken.
242 | FetchNWSForecast = false
243 | FetchNWSAlerts = true
244 | NWSUserAgent = ChatKekeWeather/1.0 (flyingfathead@protonmail.com)
245 | NWSRetries = 3
246 | NWSRetryDelay = 2
247 | 


--------------------------------------------------------------------------------
/src/api_perplexity_search.py:
--------------------------------------------------------------------------------
  1 | # # # api_perplexity_search.py
  2 | # # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  3 | # # # https://github.com/FlyingFathead/TelegramBot-OpenAI-API/
  4 | # # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5 | 
  6 | import re
  7 | import openai
  8 | import httpx
  9 | import logging
 10 | import os
 11 | import asyncio
 12 | import configparser
 13 | import random
 14 | from config_paths import CONFIG_PATH
 15 | 
 16 | # Load the configuration file
 17 | config = configparser.ConfigParser()
 18 | config.read(CONFIG_PATH)
 19 | 
 20 | # Perplexity API model to use -- NOTE: the models keep on changing; latest list is at: https://docs.perplexity.ai/guides/model-cards
 21 | # As of December 2024/January 2025, the latest model is in the llama-3.1 family, i.e.: "llama-3.1-sonar-large-128k-online" (can be small/large/huge)
 22 | DEFAULT_PERPLEXITY_MODEL = "sonar"
 23 | DEFAULT_PERPLEXITY_MAX_TOKENS = 1024
 24 | DEFAULT_PERPLEXITY_TEMPERATURE = 0.0
 25 | DEFAULT_PERPLEXITY_MAX_RETRIES = 3
 26 | DEFAULT_PERPLEXITY_RETRY_DELAY = 25
 27 | DEFAULT_PERPLEXITY_TIMEOUT = 30
 28 | DEFAULT_CHUNK_SIZE = 1000
 29 | PERPLEXITY_MODEL = config.get('Perplexity', 'Model', fallback=DEFAULT_PERPLEXITY_MODEL)
 30 | PERPLEXITY_MAX_TOKENS = config.getint('Perplexity', 'MaxTokens', fallback=DEFAULT_PERPLEXITY_MAX_TOKENS)
 31 | PERPLEXITY_TEMPERATURE = config.getfloat('Perplexity', 'Temperature', fallback=DEFAULT_PERPLEXITY_TEMPERATURE)
 32 | PERPLEXITY_MAX_RETRIES = config.getint('Perplexity', 'MaxRetries', fallback=DEFAULT_PERPLEXITY_MAX_RETRIES)
 33 | PERPLEXITY_RETRY_DELAY = config.getint('Perplexity', 'RetryDelay', fallback=DEFAULT_PERPLEXITY_RETRY_DELAY)
 34 | PERPLEXITY_TIMEOUT = config.getint('Perplexity', 'Timeout', fallback=DEFAULT_PERPLEXITY_TIMEOUT)
 35 | CHUNK_SIZE = config.getint('Perplexity', 'ChunkSize', fallback=DEFAULT_CHUNK_SIZE)
 36 | PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
 37 | MAX_TELEGRAM_MESSAGE_LENGTH = 4000
 38 | 
 39 | async def fact_check_with_perplexity(question: str):
 40 |     url = "https://api.perplexity.ai/chat/completions"
 41 |     headers = {
 42 |         "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
 43 |         "Content-Type": "application/json",
 44 |         "Accept": "application/json",
 45 |     }
 46 |     data = {
 47 |         "model": PERPLEXITY_MODEL,
 48 |         "stream": False,
 49 |         "max_tokens": PERPLEXITY_MAX_TOKENS,
 50 |         "temperature": PERPLEXITY_TEMPERATURE,
 51 |         "messages": [{"role": "user", "content": question}]
 52 |     }
 53 | 
 54 |     async with httpx.AsyncClient(timeout=PERPLEXITY_TIMEOUT) as client:
 55 |         for attempt in range(PERPLEXITY_MAX_RETRIES):
 56 |             try:
 57 |                 response = await client.post(url, json=data, headers=headers)
 58 |                 if response.status_code == 200:
 59 |                     return response.json()
 60 |                 elif response.status_code == 500:
 61 |                     logging.error("Perplexity API returned a 500 server error.")
 62 |                     return {"error": "server_error"}
 63 |                 else:
 64 |                     logging.error(f"Perplexity API Error: {response.text}")
 65 |             except (httpx.RequestError, httpx.HTTPStatusError) as e:
 66 |                 logging.error(f"Error while calling Perplexity API: {e}")
 67 | 
 68 |             backoff_delay = min(PERPLEXITY_RETRY_DELAY, (2 ** attempt) + random.uniform(0, 1))
 69 |             await asyncio.sleep(backoff_delay)
 70 | 
 71 |     return None
 72 | 
 73 | async def query_perplexity(bot, chat_id, question: str):
 74 |     logging.info(f"Querying Perplexity with question: {question}")
 75 |     response_data = await fact_check_with_perplexity(question)
 76 | 
 77 |     if response_data and 'choices' in response_data:
 78 |         bot_reply_content = response_data['choices'][0].get('message', {}).get('content', "").strip()
 79 |         if bot_reply_content:
 80 |             return bot_reply_content
 81 |         else:
 82 |             logging.warning("Processed content is empty after stripping.")
 83 |             return "Received an empty response, please try again."
 84 |     elif response_data and response_data.get('error') == 'server_error':
 85 |         logging.error("Perplexity API server error.")
 86 |         return "Perplexity API is currently unavailable due to server issues. Please try again later."
 87 |     else:
 88 |         logging.error("Unexpected response structure from Perplexity API.")
 89 |         return "Error interpreting the response."
 90 | 
 91 | # Utilities
 92 | def smart_chunk(text, chunk_size=CHUNK_SIZE):
 93 |     chunks = []
 94 |     blocks = text.split('\n\n')
 95 |     current_chunk = ""
 96 | 
 97 |     for block in blocks:
 98 |         if len(current_chunk) + len(block) + 2 <= chunk_size:
 99 |             current_chunk += block + "\n\n"
100 |         else:
101 |             if current_chunk:
102 |                 chunks.append(current_chunk.strip())
103 |                 current_chunk = ""
104 | 
105 |             if len(block) > chunk_size:
106 |                 lines = block.split('\n')
107 |                 temp_chunk = ""
108 | 
109 |                 for line in lines:
110 |                     if len(temp_chunk) + len(line) + 1 <= chunk_size:
111 |                         temp_chunk += line + "\n"
112 |                     else:
113 |                         if temp_chunk:
114 |                             chunks.append(temp_chunk.strip())
115 |                             temp_chunk = ""
116 |                         sentences = re.split('([.!?] )', line)
117 |                         sentence_chunk = ""
118 |                         for sentence in sentences:
119 |                             if sentence.strip():
120 |                                 if len(sentence_chunk) + len(sentence) <= chunk_size:
121 |                                     sentence_chunk += sentence
122 |                                 else:
123 |                                     if sentence_chunk:
124 |                                         chunks.append(sentence_chunk.strip())
125 |                                         sentence_chunk = ""
126 |                                     sentence_chunk = sentence
127 |                         if sentence_chunk:
128 |                             chunks.append(sentence_chunk.strip())
129 |             else:
130 |                 current_chunk = block + "\n\n"
131 | 
132 |     if current_chunk.strip():
133 |         chunks.append(current_chunk.strip())
134 | 
135 |     return chunks
136 | 
137 | def rejoin_chunks(chunks):
138 |     rejoined_text = ""
139 |     for i, chunk in enumerate(chunks):
140 |         trimmed_chunk = chunk.strip()
141 |         if i == 0:
142 |             rejoined_text += trimmed_chunk
143 |         else:
144 |             if rejoined_text.endswith('\n\n'):
145 |                 if not trimmed_chunk.startswith('- ') and not trimmed_chunk.startswith('### ') and not trimmed_chunk.startswith('## '):
146 |                     rejoined_text += '\n' + trimmed_chunk
147 |                 else:
148 |                     rejoined_text += trimmed_chunk
149 |             else:
150 |                 rejoined_text += '\n\n' + trimmed_chunk
151 |     return rejoined_text
152 | 
153 | def format_headers_for_telegram(translated_response):
154 |     lines = translated_response.split('\n')
155 |     formatted_lines = []
156 | 
157 |     for i, line in enumerate(lines):
158 |         if line.startswith('####'):
159 |             if i > 0 and lines[i - 1].strip() != '':
160 |                 formatted_lines.append('')
161 |             formatted_line = '◦ <b>' + line[4:].strip() + '</b>'
162 |             formatted_lines.append(formatted_line)
163 |             if i < len(lines) - 1 and lines[i + 1].strip() != '':
164 |                 formatted_lines.append('')
165 |         elif line.startswith('###'):
166 |             if i > 0 and lines[i - 1].strip() != '':
167 |                 formatted_lines.append('')
168 |             formatted_line = '• <b>' + line[3:].strip() + '</b>'
169 |             formatted_lines.append(formatted_line)
170 |             if i < len(lines) - 1 and lines[i + 1].strip() != '':
171 |                 formatted_lines.append('')
172 |         elif line.startswith('##'):
173 |             if i > 0 and lines[i - 1].strip() != '':
174 |                 formatted_lines.append('')
175 |             formatted_line = '➤ <b>' + line[2:].strip() + '</b>'
176 |             formatted_lines.append(formatted_line)
177 |             if i < len(lines) - 1 and lines[i + 1].strip() != '':
178 |                 formatted_lines.append('')
179 |         else:
180 |             formatted_lines.append(line)
181 | 
182 |     formatted_response = '\n'.join(formatted_lines)
183 |     return formatted_response
184 | 
185 | def markdown_to_html(md_text):
186 |     html_text = re.sub(r'\$\$(.*?)\$\$', r'<pre>\1</pre>', md_text)
187 |     html_text = re.sub(r'\\\[(.*?)\\\]', r'<pre>\1</pre>', html_text)
188 |     html_text = re.sub(r'^#### (.*)', r'<b>\1</b>', html_text, flags=re.MULTILINE)
189 |     html_text = re.sub(r'^### (.*)', r'<b>\1</b>', html_text, flags=re.MULTILINE)
190 |     html_text = re.sub(r'^## (.*)', r'<b>\1</b>', html_text, flags=re.MULTILINE)
191 |     html_text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', html_text)
192 |     html_text = re.sub(r'\*(.*?)\*|_(.*?)_', r'<i>\1\2</i>', html_text)
193 |     html_text = re.sub(r'\[(.*?)\]\((.*?)\)', r'<a href="\2">\1</a>', html_text)
194 |     html_text = re.sub(r'`(.*?)`', r'<code>\1</code>', html_text)
195 |     html_text = re.sub(r'```(.*?)```', r'<pre>\1</pre>', html_text, flags=re.DOTALL)
196 |     return html_text
197 | 
198 | def sanitize_urls(text):
199 |     url_pattern = re.compile(r'<(http[s]?://[^\s<>]+)>')
200 |     sanitized_text = re.sub(url_pattern, r'\1', text)
201 |     return sanitized_text
202 | 
203 | # split long messages
204 | def split_message(text, max_length=MAX_TELEGRAM_MESSAGE_LENGTH):
205 |     paragraphs = text.split('\n')
206 |     chunks = []
207 |     current_chunk = ""
208 | 
209 |     for paragraph in paragraphs:
210 |         if len(current_chunk) + len(paragraph) + 1 <= max_length:
211 |             current_chunk += paragraph + "\n"
212 |         else:
213 |             if current_chunk:
214 |                 chunks.append(current_chunk.strip())
215 |             current_chunk = paragraph + "\n"
216 | 
217 |     if current_chunk.strip():
218 |         chunks.append(current_chunk.strip())
219 | 
220 |     # Further split chunks that are still too large
221 |     final_chunks = []
222 |     for chunk in chunks:
223 |         while len(chunk) > max_length:
224 |             split_point = chunk.rfind('.', 0, max_length)
225 |             if split_point == -1:
226 |                 split_point = max_length
227 |             final_chunks.append(chunk[:split_point].strip())
228 |             chunk = chunk[split_point:].strip()
229 |         if chunk:
230 |             final_chunks.append(chunk.strip())
231 | 
232 |     logging.info(f"Total number of chunks created: {len(final_chunks)}")
233 |     return final_chunks
234 | 
235 | async def send_split_messages(context, chat_id, text):
236 |     chunks = split_message(text)
237 |     logging.info(f"Total number of chunks to be sent: {len(chunks)}")
238 | 
239 |     for chunk in chunks:
240 |         if not chunk.strip():
241 |             logging.warning("send_split_messages attempted to send an empty chunk. Skipping.")
242 |             continue
243 | 
244 |         logging.info(f"Sending chunk with length: {len(chunk)}")
245 |         await context.bot.send_message(chat_id=chat_id, text=chunk, parse_mode='HTML')
246 |         logging.info(f"Sent chunk with length: {len(chunk)}")
247 |     logging.info("send_split_messages completed.")
248 | 
249 | async def handle_long_response(context, chat_id, long_response_text):
250 |     if not long_response_text.strip():
251 |         logging.warning("handle_long_response received an empty message. Skipping.")
252 |         return
253 | 
254 |     logging.info(f"Handling long response with text length: {len(long_response_text)}")
255 |     await send_split_messages(context, chat_id, long_response_text)
256 | 
257 | # language detection over OpenAI API
258 | async def detect_language(bot, text):
259 |     prompt = f"Detect the language of the following text:\n\n{text}\n\nRespond with only the language code, e.g., 'en' for English, 'fi' for Finnish, 'jp' for Japanese. HINT: If the query starts off with i.e. 'kuka', 'mikä', 'mitä' or 'missä', 'milloin', 'miksi', 'minkä', 'minkälainen', 'mikä', 'kenen', 'kenenkä', 'keiden', 'kenestä, 'kelle', 'keneltä', 'kenelle', it's probably in Finnish ('fi')."
260 |     
261 |     payload = {
262 |         "model": bot.model,
263 |         "messages": [
264 |             {"role": "system", "content": "You are a language detection assistant."},
265 |             {"role": "user", "content": prompt}
266 |         ],
267 |         "temperature": 0,
268 |         "max_tokens": 10
269 |     }
270 | 
271 |     headers = {
272 |         "Content-Type": "application/json",
273 |         "Authorization": f"Bearer {bot.openai_api_key}"
274 |     }
275 | 
276 |     try:
277 |         async with httpx.AsyncClient() as client:
278 |             response = await client.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
279 |             response.raise_for_status()
280 |             detected_language = response.json()['choices'][0]['message']['content'].strip()
281 |             logging.info(f"Detected language: {detected_language}")
282 |             return detected_language
283 |     except httpx.RequestError as e:
284 |         logging.error(f"RequestError while calling OpenAI API: {e}")
285 |     except httpx.HTTPStatusError as e:
286 |         logging.error(f"HTTPStatusError while calling OpenAI API: {e}")
287 |     except Exception as e:
288 |         logging.error(f"Unexpected error while calling OpenAI API: {e}")
289 |         return 'en'  # Default to English in case of an error
290 | 


--------------------------------------------------------------------------------
/src/rag_elasticsearch/qa_to_json.py:
--------------------------------------------------------------------------------
  1 | # qa_to_json.py
  2 | # a part of the `elasticsearch_db` toolkit
  3 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  4 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/
  5 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | import os
  8 | import json
  9 | import sys
 10 | from elasticsearch import Elasticsearch
 11 | from argparse import ArgumentParser
 12 | 
 13 | def parse_qa_text(file_path):
 14 |     qa_pairs = []
 15 |     with open(file_path, 'r', encoding='utf-8') as file:
 16 |         content = file.read()
 17 |     qa_blocks = content.split('###')
 18 |     for block in qa_blocks:
 19 |         lines = block.strip().split('\n')
 20 |         if len(lines) < 2:
 21 |             continue
 22 |         current_pair = {'question': '', 'answer': '', 'references': ''}
 23 |         is_answer = False
 24 |         for line in lines:
 25 |             if line.startswith('> '):
 26 |                 if current_pair['question'] and current_pair['answer']:
 27 |                     qa_pairs.append(current_pair)
 28 |                     current_pair = {'question': '', 'answer': '', 'references': ''}
 29 |                 current_pair['question'] = line[2:].strip()
 30 |                 is_answer = False
 31 |             elif line.startswith('>> '):
 32 |                 is_answer = True
 33 |                 if current_pair['answer']:
 34 |                     current_pair['answer'] += '\n'
 35 |                 current_pair['answer'] += line[3:].strip()
 36 |             elif line.startswith('## '):
 37 |                 current_pair['references'] = line[3:].strip()
 38 |             elif is_answer:
 39 |                 current_pair['answer'] += '\n' + line.strip()
 40 |         if current_pair['question'] and current_pair['answer']:
 41 |             qa_pairs.append(current_pair)
 42 |     return qa_pairs
 43 | 
 44 | def add_to_index(es, index, qa_pairs, backup_file):
 45 |     for pair in qa_pairs:
 46 |         es.index(index=index, body=pair)
 47 |     backup_to_json(backup_file, qa_pairs)
 48 | 
 49 | def interactive_mode(es, index, backup_file):
 50 |     while True:
 51 |         mode = input("Choose mode - [s]ingle question, [m]ulti-question, [b]atch input (or type 'exit' to finish): ")
 52 |         if mode.lower() == 'exit':
 53 |             break
 54 | 
 55 |         questions = []
 56 |         if mode.lower() == 's':
 57 |             question = input("Enter your question: ")
 58 |             if question.strip():
 59 |                 questions.append(question)
 60 |         elif mode.lower() == 'm' or mode.lower() == 'b':
 61 |             prompt_text = "Enter your questions, one per line. When finished, press Enter on an empty line:" if mode.lower() == 'b' else "Enter your question (or type 'done' to finish questions): "
 62 |             print(prompt_text) if mode.lower() == 'b' else None
 63 |             while True:
 64 |                 question = input() if mode.lower() == 'b' else input("Enter your question (or type 'done' to finish questions): ")
 65 |                 if question == "" and mode.lower() == 'b':
 66 |                     break
 67 |                 if question.lower() == 'done' and mode.lower() == 'm':
 68 |                     break
 69 |                 if question.strip():
 70 |                     questions.append(question.strip())
 71 | 
 72 |         if not questions:
 73 |             print("No questions entered. Skipping to next entry.")
 74 |             continue
 75 | 
 76 |         answer = input("Enter the answer: ")
 77 |         references = input("Enter any references (optional): ")
 78 |         qa_pairs = [{'question': q, 'answer': answer, 'references': references} for q in questions]
 79 | 
 80 |         for pair in qa_pairs:
 81 |             print("\nQ&A pair generated:")
 82 |             print("<" + "-"*72 + ">")
 83 |             print("Q:", pair["question"])
 84 |             print("A:", pair["answer"])
 85 |             if references:
 86 |                 print("Ref:", references)
 87 |             print("<" + "-"*72 + ">")
 88 | 
 89 |         confirm = input("Add to index (y/n)? ")
 90 |         if confirm.lower() == 'y':
 91 |             add_to_index(es, index, qa_pairs, backup_file)
 92 |             print(f"Added {len(qa_pairs)} Q&A pairs to Elasticsearch index '{index}' and backed up to JSON file.")
 93 |         else:
 94 |             print("No Q&A pairs were added.")
 95 | 
 96 | def backup_to_json(file_path, qa_pairs):
 97 |     try:
 98 |         data = []
 99 |         if os.path.exists(file_path):
100 |             with open(file_path, 'r', encoding='utf-8') as file:
101 |                 data = json.load(file)
102 |         data.extend(qa_pairs)
103 |         
104 |         # Validate JSON data before writing
105 |         try:
106 |             json.dumps(data)
107 |         except json.JSONDecodeError as e:
108 |             raise ValueError(f"Invalid JSON data: {e}")
109 | 
110 |         with open(file_path, 'w', encoding='utf-8') as file:
111 |             json.dump(data, file, indent=4, ensure_ascii=False)
112 |     except Exception as e:
113 |         print(f"Failed to backup Q&A pairs to JSON: {e}")
114 | 
115 | def main():
116 | 
117 |     backup_file = "./backup_file.json"
118 | 
119 |     parser = ArgumentParser(description="Parse Q&A text and optionally add to Elasticsearch index.")
120 |     parser.add_argument("file_path", nargs='?', help="Path to the Q&A text file.", default=None)
121 |     parser.add_argument("--addtoindex", action="store_true", help="If set, add parsed Q&A pairs to Elasticsearch index.")
122 |     parser.add_argument("--index", default="tg-bot-rag-index", help="Elasticsearch index name. Default is 'tg-bot-rag-index'.")
123 |     parser.add_argument("--interactive", action="store_true", help="Enable interactive mode to add Q&A pairs.")
124 |     args = parser.parse_args()
125 | 
126 |     if args.interactive:
127 |         es = Elasticsearch(["http://localhost:9200"])
128 |         if not es.ping():
129 |             print("Could not connect to Elasticsearch.")
130 |             sys.exit(1)
131 |         interactive_mode(es, args.index, backup_file)
132 |     elif args.file_path:
133 |         parsed_data = parse_qa_text(args.file_path)
134 |         
135 |         # Validate parsed data before proceeding
136 |         try:
137 |             json.dumps(parsed_data)
138 |         except json.JSONDecodeError as e:
139 |             print(f"Invalid JSON data: {e}")
140 |             sys.exit(1)
141 | 
142 |         if args.addtoindex:
143 |             print("Q&A pairs generated:")
144 |             for pair in parsed_data:
145 |                 print("<" + "-"*72 + ">")
146 |                 print("Q:", pair["question"])
147 |                 print("A:", pair["answer"])
148 |                 if pair["references"]:
149 |                     print("Ref:", pair["references"])
150 |                 print("<" + "-"*72 + ">\n")
151 | 
152 |             confirm = input("Add to index (y/n)? ")
153 |             if confirm.lower() != 'y':
154 |                 print("Operation cancelled by the user.")
155 |                 sys.exit(0)
156 | 
157 |             es = Elasticsearch(["http://localhost:9200"])
158 |             if not es.ping():
159 |                 print("Could not connect to Elasticsearch.")
160 |                 sys.exit(1)
161 |             add_to_index(es, args.index, parsed_data, backup_file)
162 |             print(f"Added {len(parsed_data)} Q&A pairs to Elasticsearch index '{args.index}'.")
163 |         else:
164 |             print(json.dumps(parsed_data, indent=4, ensure_ascii=False))
165 |     else:
166 |         print("Please provide a file path or enable interactive mode.")
167 |         sys.exit(1)
168 | 
169 | if __name__ == "__main__":
170 |     main()
171 | 
172 | 
173 | # import os
174 | # import json
175 | # import sys
176 | # from elasticsearch import Elasticsearch
177 | # from argparse import ArgumentParser
178 | 
179 | # def parse_qa_text(file_path):
180 | #     qa_pairs = []
181 | #     with open(file_path, 'r', encoding='utf-8') as file:
182 | #         content = file.read()
183 | #     qa_blocks = content.split('###')
184 | #     for block in qa_blocks:
185 | #         lines = block.strip().split('\n')
186 | #         if len(lines) < 2:
187 | #             continue
188 | #         current_pair = {'question': '', 'answer': '', 'references': ''}
189 | #         is_answer = False
190 | #         for line in lines:
191 | #             if line.startswith('> '):
192 | #                 if current_pair['question'] and current_pair['answer']:
193 | #                     qa_pairs.append(current_pair)
194 | #                     current_pair = {'question': '', 'answer': '', 'references': ''}
195 | #                 current_pair['question'] = line[2:].strip()
196 | #                 is_answer = False
197 | #             elif line.startswith('>> '):
198 | #                 is_answer = True
199 | #                 if current_pair['answer']:
200 | #                     current_pair['answer'] += '\n'
201 | #                 current_pair['answer'] += line[3:].strip()
202 | #             elif line.startswith('## '):
203 | #                 current_pair['references'] = line[3:].strip()
204 | #             elif is_answer:
205 | #                 current_pair['answer'] += '\n' + line.strip()
206 | #         if current_pair['question'] and current_pair['answer']:
207 | #             qa_pairs.append(current_pair)
208 | #     return qa_pairs
209 | 
210 | # def add_to_index(es, index, qa_pairs, backup_file):
211 | #     for pair in qa_pairs:
212 | #         es.index(index=index, body=pair)
213 | #     backup_to_json(backup_file, qa_pairs)  # Call backup function after adding to Elasticsearch
214 | 
215 | # def interactive_mode(es, index, backup_file):
216 | #     while True:
217 | #         mode = input("Choose mode - [s]ingle question, [m]ulti-question, [b]atch input (or type 'exit' to finish): ")
218 | #         if mode.lower() == 'exit':
219 | #             break
220 | 
221 | #         questions = []
222 | #         if mode.lower() == 's':
223 | #             question = input("Enter your question: ")
224 | #             if question.strip():  # Ensure the question is not empty or whitespace
225 | #                 questions.append(question)
226 | #         elif mode.lower() == 'm' or mode.lower() == 'b':
227 | #             prompt_text = "Enter your questions, one per line. When finished, press Enter on an empty line:" if mode.lower() == 'b' else "Enter your question (or type 'done' to finish questions): "
228 | #             print(prompt_text) if mode.lower() == 'b' else None
229 | #             while True:
230 | #                 question = input() if mode.lower() == 'b' else input("Enter your question (or type 'done' to finish questions): ")
231 | #                 if question == "" and mode.lower() == 'b':  # End input for batch mode on empty line
232 | #                     break
233 | #                 if question.lower() == 'done' and mode.lower() == 'm':  # End input for multi-question mode on 'done'
234 | #                     break
235 | #                 if question.strip():  # Ignore empty or whitespace-only lines
236 | #                     questions.append(question.strip())
237 | 
238 | #         if not questions:
239 | #             print("No questions entered. Skipping to next entry.")
240 | #             continue
241 | 
242 | #         answer = input("Enter the answer: ")
243 | #         references = input("Enter any references (optional): ")
244 | #         qa_pairs = [{'question': q, 'answer': answer, 'references': references} for q in questions]
245 | 
246 | #         for pair in qa_pairs:
247 | #             print("\nQ&A pair generated:")
248 | #             print("<" + "-"*72 + ">")
249 | #             print("Q:", pair["question"])
250 | #             print("A:", pair["answer"])
251 | #             if references:
252 | #                 print("Ref:", references)
253 | #             print("<" + "-"*72 + ">")
254 | 
255 | #         confirm = input("Add to index (y/n)? ")
256 | #         if confirm.lower() == 'y':
257 | #             add_to_index(es, index, qa_pairs, backup_file)
258 | #             print(f"Added {len(qa_pairs)} Q&A pairs to Elasticsearch index '{index}' and backed up to JSON file.")
259 | #         else:
260 | #             print("No Q&A pairs were added.")
261 |             
262 | # # backup generated Q&A pairs to a JSON file
263 | # def backup_to_json(file_path, qa_pairs):
264 | #     try:
265 | #         data = []
266 | #         if os.path.exists(file_path):
267 | #             with open(file_path, 'r', encoding='utf-8') as file:
268 | #                 data = json.load(file)
269 | #         data.extend(qa_pairs)
270 | #         with open(file_path, 'w', encoding='utf-8') as file:
271 | #             json.dump(data, file, indent=4, ensure_ascii=False)
272 | #     except Exception as e:
273 | #         print(f"Failed to backup Q&A pairs to JSON: {e}")
274 | 
275 | # def main():
276 | 
277 | #     # define the backup file for q&a's created
278 | #     backup_file = "./backup_file.json"
279 | 
280 | #     parser = ArgumentParser(description="Parse Q&A text and optionally add to Elasticsearch index.")
281 | #     parser.add_argument("file_path", nargs='?', help="Path to the Q&A text file.", default=None)
282 | #     parser.add_argument("--addtoindex", action="store_true", help="If set, add parsed Q&A pairs to Elasticsearch index.")
283 | #     parser.add_argument("--index", default="tg-bot-rag-index", help="Elasticsearch index name. Default is 'tg-bot-rag-index'.")
284 | #     parser.add_argument("--interactive", action="store_true", help="Enable interactive mode to add Q&A pairs.")
285 | #     args = parser.parse_args()
286 | 
287 | #     if args.interactive:
288 | #         es = Elasticsearch(["http://localhost:9200"])
289 | #         if not es.ping():
290 | #             print("Could not connect to Elasticsearch.")
291 | #             sys.exit(1)
292 | #         interactive_mode(es, args.index, backup_file)
293 | #     elif args.file_path:
294 | #         parsed_data = parse_qa_text(args.file_path)
295 | #         if args.addtoindex:
296 | #             print("Q&A pairs generated:")
297 | #             for pair in parsed_data:
298 | #                 print("<" + "-"*72 + ">")
299 | #                 print("Q:", pair["question"])
300 | #                 print("A:", pair["answer"])
301 | #                 if pair["references"]:
302 | #                     print("Ref:", pair["references"])
303 | #                 print("<" + "-"*72 + ">\n")
304 | 
305 | #             confirm = input("Add to index (y/n)? ")
306 | #             if confirm.lower() != 'y':
307 | #                 print("Operation cancelled by the user.")
308 | #                 sys.exit(0)
309 | 
310 | #             es = Elasticsearch(["http://localhost:9200"])
311 | #             if not es.ping():
312 | #                 print("Could not connect to Elasticsearch.")
313 | #                 sys.exit(1)
314 | #             add_to_index(es, args.index, parsed_data)
315 | #             print(f"Added {len(parsed_data)} Q&A pairs to Elasticsearch index '{args.index}'.")
316 | #         else:
317 | #             print(json.dumps(parsed_data, indent=4, ensure_ascii=False))
318 | #     else:
319 | #         print("Please provide a file path or enable interactive mode.")
320 | #         sys.exit(1)
321 | 
322 | # if __name__ == "__main__":
323 | #     main()
324 | 
325 | # # old code for reference =>
326 | # """ def interactive_mode(es, index):
327 | #     qa_pairs = []
328 | #     while True:
329 | #         question = input("Enter your question (or type 'exit' to finish): ")
330 | #         if question == 'exit':
331 | #             break
332 | #         answer = input("Enter the answer: ")
333 | #         references = input("Enter any references (optional): ")
334 | #         print("\nQ&A pair generated:")
335 | #         print("<" + "-"*72 + ">")
336 | #         print("Q:", question)
337 | #         print("A:", answer)
338 | #         if references:
339 | #             print("Ref:", references)
340 | #         print("<" + "-"*72 + ">")
341 | 
342 | #         confirm = input("Add to index (y/n)? ")
343 | #         if confirm.lower() == 'y':
344 | #             qa_pairs.append({'question': question, 'answer': answer, 'references': references})
345 |     
346 | #     if qa_pairs:
347 | #         add_to_index(es, index, qa_pairs)
348 | #         print(f"Added {len(qa_pairs)} Q&A pairs to Elasticsearch index '{index}'.")
349 | #     else:
350 | #         print("No Q&A pairs were added.") """    


--------------------------------------------------------------------------------
/src/bot_commands.py:
--------------------------------------------------------------------------------
  1 | # bot_commands.py
  2 | # for telegram
  3 | from telegram import Update, Bot
  4 | from telegram.ext import Application, MessageHandler, filters, CommandHandler, CallbackContext
  5 | from telegram.constants import ParseMode
  6 | from telegram.helpers import escape_markdown
  7 | from functools import partial
  8 | 
  9 | import json
 10 | import os
 11 | import datetime
 12 | import logging
 13 | 
 14 | # bot's modules
 15 | from config_paths import CONFIG_PATH
 16 | from token_usage_visualization import generate_usage_chart
 17 | from modules import reset_token_usage_at_midnight 
 18 | 
 19 | # ~~~~~~~~~~~~~~
 20 | # admin commands
 21 | # ~~~~~~~~~~~~~~
 22 | 
 23 | # /admin (admin commands help menu)
 24 | async def admin_command(update: Update, context: CallbackContext, bot_owner_id):
 25 |     if bot_owner_id == '0':
 26 |         await update.message.reply_text("The /admin command is disabled.")
 27 |         return
 28 | 
 29 |     if str(update.message.from_user.id) == bot_owner_id:
 30 |         admin_commands = """
 31 | Admin Commands:
 32 | - <code>/viewconfig</code>: View the bot configuration (from <code>config.ini</code>).
 33 | - <code>/usage</code>: View the bot's daily token usage in plain text.
 34 | - <code>/usagechart</code>: View the bot's daily token usage as a chart.
 35 | - <code>/reset</code>: Reset the bot's context memory.
 36 | - <code>/resetsystemmessage</code>: Reset the system message from <code>config.ini</code>.
 37 | - <code>/setsystemmessage &lt;system message&gt;</code>: Set a new system message (note: not saved into config).
 38 |         """
 39 |         await update.message.reply_text(admin_commands, parse_mode=ParseMode.HTML)
 40 |     else:
 41 |         await update.message.reply_text("You are not authorized to use this command.")
 42 | 
 43 | # /restart (admin command)
 44 | async def restart_command(update: Update, context: CallbackContext, bot_owner_id):
 45 |     if bot_owner_id == '0':
 46 |         await update.message.reply_text("The /restart command is disabled.")
 47 |         return
 48 | 
 49 |     if str(update.message.from_user.id) == bot_owner_id:
 50 |         # WIP: Implement restart logic here
 51 |         await update.message.reply_text("Restarting the bot...")
 52 |     else:
 53 |         await update.message.reply_text("You are not authorized to use this command.")
 54 | 
 55 | # /resetdailytokens (admin command for resetting daily token usage)
 56 | async def reset_daily_tokens_command(update: Update, context: CallbackContext, bot_instance):
 57 |     user_id = update.message.from_user.id
 58 |     if bot_instance.bot_owner_id == '0' or str(user_id) != bot_instance.bot_owner_id:
 59 |         logging.info(f"User {user_id} tried to use /resetdailytokens but was not authorized.")
 60 |         await update.message.reply_text("You are not authorized to use this command.")
 61 |         return
 62 | 
 63 |     try:
 64 |         
 65 |         # (old fallback method, JIC)
 66 |         # Reset the in-memory token usage counter
 67 |         # bot_instance.total_token_usage = 0
 68 |         # logging.info("In-memory token usage counter reset.")
 69 | 
 70 |         # Pass the reset_total_token_usage method as a callback to reset_token_usage_at_midnight
 71 |         reset_token_usage_at_midnight(bot_instance.token_usage_file, bot_instance.reset_total_token_usage)
 72 |         logging.info(f"User {user_id} has reset the daily token usage, including the in-memory token usage counter.")
 73 |         await update.message.reply_text("Daily token usage has been reset, including the in-memory token usage counter.")
 74 |         
 75 |     except Exception as e:
 76 |         logging.error(f"Failed to reset daily token usage: {e}")
 77 |         await update.message.reply_text("Failed to reset daily token usage.")
 78 | 
 79 | # /resetsystemmessage (admin command)
 80 | async def reset_system_message_command(update: Update, context: CallbackContext, bot_instance):
 81 |     user_id = update.message.from_user.id
 82 |     if bot_instance.bot_owner_id == '0' or str(user_id) != bot_instance.bot_owner_id:
 83 |         logging.info(f"User {user_id} tried to use /resetsystemmessage but was not authorized.")
 84 |         await update.message.reply_text("You are not authorized to use this command.")
 85 |         return
 86 | 
 87 |     old_system_message = bot_instance.system_instructions
 88 |     bot_instance.system_instructions = bot_instance.config.get('SystemInstructions', 'You are an OpenAI API-based chatbot on Telegram.')
 89 |     logging.info(f"User {user_id} reset the system message to default.")
 90 |     await update.message.reply_text(f"System message reset to default.\n\nOld Message:\n<code>{old_system_message}</code>\n----------------------\nNew Default Message:\n<code>{bot_instance.system_instructions}</code>", parse_mode=ParseMode.HTML)
 91 | 
 92 | # /setsystemmessage (admin command)
 93 | async def set_system_message_command(update: Update, context: CallbackContext, bot_instance):
 94 |     user_id = update.message.from_user.id
 95 |     if bot_instance.bot_owner_id == '0' or str(user_id) != bot_instance.bot_owner_id:
 96 |         logging.info(f"User {user_id} tried to use /setsystemmessage but was not authorized.")
 97 |         await update.message.reply_text("You are not authorized to use this command.")
 98 |         return
 99 | 
100 |     new_system_message = ' '.join(context.args)
101 |     if new_system_message:
102 |         old_system_message = bot_instance.system_instructions
103 |         bot_instance.system_instructions = new_system_message
104 |         logging.info(f"User {user_id} updated the system message to: {new_system_message}")
105 |         await update.message.reply_text(f"System message updated.\n\nOld Message: <code>{old_system_message}</code>\nNew Message: <code>{new_system_message}</code>", parse_mode=ParseMode.HTML)
106 |     else:
107 |         logging.info(f"User {user_id} attempted to set system message but provided no new message.")
108 |         await update.message.reply_text("Please provide the new system message in the command line, i.e.: /setsystemmessage My new system message to the AI on what it is, where it is, etc.")
109 | 
110 | 
111 | # /usage (admin command)
112 | # bot_commands.py
113 | async def usage_command(update: Update, context: CallbackContext):
114 |     bot_instance = context.bot_data.get('bot_instance')  # Retrieve the bot instance from context
115 |     
116 |     if not bot_instance:
117 |         await update.message.reply_text("Internal error: Bot instance not found.")
118 |         logging.error("Bot instance not found in context.bot_data")
119 |         return
120 | 
121 |     logging.info(f"User {update.message.from_user.id} invoked /usage command")
122 | 
123 |     if bot_instance.bot_owner_id == '0':
124 |         await update.message.reply_text("The `/usage` command is disabled.")
125 |         logging.info("Usage command is disabled until a bot owner is defined in `config.ini`.")
126 |         return
127 | 
128 |     if str(update.message.from_user.id) != bot_instance.bot_owner_id:
129 |         await update.message.reply_text("You don't have permission to use this command.")
130 |         logging.info(f"User {update.message.from_user.id} does not have permission to use /usage")
131 |         return
132 | 
133 |     # Correct path to token_usage.json inside logs/ directory
134 |     # token_usage_file = os.path.join(bot_instance.data_directory, 'logs', 'token_usage.json')
135 |     token_usage_file = os.path.join(bot_instance.logs_directory, 'token_usage.json')
136 | 
137 |     logging.info(f"Looking for token usage file at: {token_usage_file}")
138 |     current_date = datetime.datetime.utcnow()
139 | 
140 |     try:
141 |         if os.path.exists(token_usage_file):
142 |             with open(token_usage_file, 'r') as file:
143 |                 token_usage_history = json.load(file)
144 |             logging.info("Loaded token usage history successfully")
145 |             
146 |             # Prune token usage history
147 |             cutoff_date = current_date - datetime.timedelta(days=bot_instance.max_history_days)
148 |             token_usage_history = {
149 |                 date: usage for date, usage in token_usage_history.items()
150 |                 if datetime.datetime.strptime(date, '%Y-%m-%d') >= cutoff_date
151 |             }
152 |             logging.info("Pruned token usage history based on cutoff date")
153 |         else:
154 |             token_usage_history = {}
155 |             logging.warning(f"Token usage file does not exist at: {token_usage_file}")
156 |     except json.JSONDecodeError:
157 |         await update.message.reply_text("Error reading token usage history.")
158 |         logging.error("JSONDecodeError while reading token_usage.json")
159 |         return
160 |     except Exception as e:
161 |         await update.message.reply_text(f"An unexpected error occurred: {e}")
162 |         logging.error(f"Unexpected error in usage_command: {e}")
163 |         return
164 | 
165 |     today_usage = token_usage_history.get(current_date.strftime('%Y-%m-%d'), 0)
166 |     token_cap_info = (
167 |         f"Today's usage: {today_usage} tokens\n"
168 |         f"Daily token cap: {'No cap' if bot_instance.max_tokens_config == 0 else f'{bot_instance.max_tokens_config} tokens'}\n\n"
169 |         "Token Usage History:\n"
170 |     )
171 | 
172 |     for date, usage in sorted(token_usage_history.items()):
173 |         token_cap_info += f"{date}: {usage} tokens\n"
174 | 
175 |     await update.message.reply_text(token_cap_info)
176 |     logging.info("Sent usage information to user")
177 | 
178 | # /usagechart (admin command)
179 | async def usage_chart_command(update: Update, context: CallbackContext):
180 |     bot_instance = context.bot_data.get('bot_instance')  # Retrieve the bot instance from context
181 |     
182 |     if not bot_instance:
183 |         await update.message.reply_text("Internal error: Bot instance not found.")
184 |         logging.error("Bot instance not found in context.bot_data")
185 |         return
186 | 
187 |     logging.info(f"User {update.message.from_user.id} invoked /usagechart command")
188 | 
189 |     if bot_instance.bot_owner_id == '0':
190 |         await update.message.reply_text("The `/usagechart` command is disabled.")
191 |         logging.info("Usagechart command is disabled")
192 |         return
193 | 
194 |     if str(update.message.from_user.id) != bot_instance.bot_owner_id:
195 |         await update.message.reply_text("You don't have permission to use this command.")
196 |         logging.info(f"User {update.message.from_user.id} does not have permission to use /usagechart")
197 |         return
198 | 
199 |     # Define paths
200 |     token_usage_file = os.path.join(bot_instance.logs_directory, 'token_usage.json')
201 |     output_image_file = os.path.join(bot_instance.data_directory, 'token_usage_chart.png')
202 | 
203 |     logging.info(f"Looking for token usage file at: {token_usage_file}")
204 |     logging.info(f"Output image file will be at: {output_image_file}")
205 | 
206 |     # Ensure the data directory exists
207 |     try:
208 |         if not os.path.exists(bot_instance.data_directory):
209 |             os.makedirs(bot_instance.data_directory, exist_ok=True)
210 |             bot_instance.logger.info(f"Created data directory at {bot_instance.data_directory}")
211 |     except OSError as e:
212 |         bot_instance.logger.error(f"Failed to create data directory {bot_instance.data_directory}: {e}")
213 |         await update.message.reply_text(f"Failed to create the data directory for the chart. Please check the bot's permissions.")
214 |         return
215 | 
216 |     # Generate the usage chart
217 |     try:
218 |         generate_usage_chart(token_usage_file, output_image_file)
219 |         bot_instance.logger.info(f"Generated usage chart at {output_image_file}")
220 |     except Exception as e:
221 |         bot_instance.logger.error(f"Failed to generate usage chart: {e}")
222 |         await update.message.reply_text("Failed to generate usage chart.")
223 |         return
224 | 
225 |     # Try to open and send the generated chart image
226 |     try:
227 |         with open(output_image_file, 'rb') as file:
228 |             await context.bot.send_photo(chat_id=update.message.chat_id, photo=file)
229 |         bot_instance.logger.info(f"Sent usage chart to chat_id {update.message.chat_id}")
230 |     except FileNotFoundError:
231 |         await update.message.reply_text("Token usage chart not found. Please ensure it's being generated correctly.")
232 |         bot_instance.logger.error("Token usage chart file not found: %s", output_image_file)
233 |     except Exception as e:
234 |         await update.message.reply_text("Failed to send the usage chart.")
235 |         bot_instance.logger.error(f"Error sending usage chart: {e}")
236 | 
237 | # /reset
238 | async def reset_command(update: Update, context: CallbackContext, bot_owner_id, reset_enabled, admin_only_reset):
239 |     # Check if the /reset command is enabled
240 |     if not reset_enabled:
241 |         logging.info(f"User tried to use the /reset command, but it was disabled.")
242 |         await update.message.reply_text("The /reset command is disabled.")
243 |         return
244 | 
245 |     # Check if the command is admin-only and if the user is the admin
246 |     if admin_only_reset and str(update.message.from_user.id) != bot_owner_id:
247 |         logging.info(f"User tried to use the /reset command, but was not authorized to do so.")
248 |         await update.message.reply_text("You are not authorized to use this command.")
249 |         return
250 | 
251 |     # If the user is authorized, or if the command is not admin-only
252 |     if 'chat_history' in context.chat_data:
253 |         context.chat_data['chat_history'] = []
254 |         logging.info(f"Memory context was reset successfully with: /reset")
255 |         await update.message.reply_text("Memory context reset successfully.")
256 |     else:
257 |         logging.info(f"No memory context to reset with: /reset")
258 |         await update.message.reply_text("No memory context to reset.")
259 | 
260 | # /viewconfig (admin command)
261 | async def view_config_command(update: Update, context: CallbackContext, bot_owner_id):
262 |     user_id = update.message.from_user.id  # Retrieve the user_id
263 | 
264 |     if bot_owner_id == '0':
265 |         logging.info(f"User {user_id} attempted to view the config with: /viewconfig -- command disabled")
266 |         await update.message.reply_text("The /viewconfig command is disabled.")
267 |         return
268 | 
269 |     if str(user_id) == bot_owner_id:
270 |         try:
271 |             config_contents = "<pre>"
272 |             with open(CONFIG_PATH, 'r') as file:
273 |                 for line in file:
274 |                     if not line.strip() or line.strip().startswith('#'):
275 |                         continue
276 |                     # Escape HTML special characters
277 |                     line = line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
278 |                     config_contents += line
279 |             config_contents += "</pre>"
280 |             logging.info(f"User {user_id} (owner) viewed the config with: /viewconfig")
281 |             if config_contents:
282 |                 await update.message.reply_text(config_contents, parse_mode=ParseMode.HTML)
283 |             else:
284 |                 logging.info(f"[WARNING] User {user_id} attempted to view the config with: /viewconfig -- no configuration settings were available")
285 |                 await update.message.reply_text("No configuration settings available.")
286 |         except Exception as e:
287 |             logging.info(f"[ERROR] User {user_id} attempted to view the config with: /viewconfig -- there was an error reading the config file: {e}")
288 |             await update.message.reply_text(f"Error reading configuration file: {e}")
289 |     else:
290 |         logging.info(f"[ATTENTION] User {user_id} attempted to view the config with: /viewconfig -- access denied")
291 |         await update.message.reply_text("You are not authorized to use this command.")
292 | 
293 | # ~~~~~~~~~~~~~
294 | # user commands
295 | # ~~~~~~~~~~~~~
296 | 
297 | # /start
298 | async def start(update: Update, context: CallbackContext, start_command_response):
299 |     await update.message.reply_text(start_command_response)
300 | 
301 | # /about
302 | async def about_command(update: Update, context: CallbackContext, version_number):
303 |     about_text = f"""
304 |     🤖 TelegramBot-OpenAI-API ⚡️ Powered by ChatKeke 🚀
305 |     This is an OpenAI-powered Telegram chatbot created by FlyingFathead.
306 |     Version: v{version_number}
307 |     For more information, visit: https://github.com/FlyingFathead/TelegramBot-OpenAI-API
308 |     (The original author is NOT responsible for any chatbots created using the code)
309 |     """
310 |     await update.message.reply_text(about_text)
311 | 
312 | # /help
313 | async def help_command(update: Update, context: CallbackContext, reset_enabled, admin_only_reset):
314 |     help_text = """
315 |     Welcome to this OpenAI API-powered chatbot! Here are some commands you can use:
316 | 
317 |     - /start: Start a conversation with the bot.
318 |     - /help: Display this help message.
319 |     - /about: Learn more about this bot.
320 |     """
321 | 
322 |     if reset_enabled:
323 |         help_text += "- /reset: Reset the bot's context memory.\n"
324 |         if admin_only_reset:
325 |             help_text += "  (Available to admin only)\n"
326 | 
327 |     help_text += "- /admin: (For bot owner only) Display admin commands.\n\nJust type your message to chat with the bot!"
328 | 
329 |     await update.message.reply_text(help_text)


--------------------------------------------------------------------------------