├── .catgitinclude ├── requirements.txt ├── src ├── extras │ ├── README.md │ ├── bot_token_test.py │ └── api_fetch_news.py ├── token_usage_visualization.py ├── rss_feeds.py ├── api_get_global_time.py ├── rag_elasticsearch │ ├── elasticsearch_backend_search.py │ ├── backup_database.py │ ├── elasticsearch_find_empty_question_fields.py │ ├── elasticsearch_find_and_delete_entry.py │ ├── elasticsearch_test_search.py │ ├── review_and_fix_entries.py │ └── qa_to_json.py ├── api_get_time.py ├── api_get_additional_weather_data.py ├── timedate_handler.py ├── configmerger.py ├── perplexity_handler.py ├── api_get_maptiler.py ├── api_key.py ├── calc_module.py ├── api_get_website_dump.py ├── utils.py ├── reminder_poller.py ├── api_get_openrouteservice.py ├── config_paths.py ├── api_get_stock_prices.py ├── api_get_stock_prices_alphavantage.py ├── url_handler.py ├── voice_message_handler.py ├── elasticsearch_handler.py ├── api_get_nws_weather.py ├── api_get_weatherapi.py ├── bot_token.py ├── reminder_handler.py ├── api_perplexity_search.py └── bot_commands.py ├── docker_deploy.sh ├── .gitignore ├── Dockerfile ├── .github └── workflows │ └── build-and-push.yml ├── docker_setup.sh └── config └── config.ini /.catgitinclude: -------------------------------------------------------------------------------- 1 | # to include in `catgit` (see https://github.com/FlyingFathead/catgit for more) 2 | src/db_utils.py 3 | src/bot_token.py 4 | src/config_paths.py 5 | src/custom_functions.py 6 | src/main.py 7 | src/modules.py 8 | src/text_message_handler.py 9 | src/utils.py 10 | config/config.ini 11 | src/reminder_poller.py 12 | src/reminder_handler.py 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | apscheduler>=3.11.0 2 | beautifulsoup4>=4.12.3 3 | configparser>=6.0.0 4 | elastic-transport>=8.15.0 5 | elasticsearch>=8.15.1 6 | ffmpeg-python>=0.2.0 7 | httpx>=0.25.2 8 | langdetect>=1.0.9 9 | matplotlib>=3.8.2 10 | holidays>=0.49 11 | lxml>=5.2.2 12 | nltk>=3.8.1 13 | openai>=1.6.1 14 | pydub>=0.25.1 15 | python-telegram-bot>=20.7 16 | transformers>=4.36.2 17 | requests>=2.31.0 18 | pytz>=2024.1 19 | timezonefinder>=6.4.0 20 | yfinance>=0.2.41 21 | yt-dlp>=2024.3.10 22 | feedparser>=6.0.11 23 | tiktoken>=0.7.0 -------------------------------------------------------------------------------- /src/extras/README.md: -------------------------------------------------------------------------------- 1 | # Extras 2 | 3 | This directory contains experimental or unimplemented modules. The scripts here are works in progress and may not be fully functional yet. They are included for future development and testing purposes. 4 | 5 | ## Contents 6 | 7 | - **`api_fetch_news.py`** 8 | A work-in-progress script intended to fetch news articles from various sources via APIs. Not yet fully implemented or integrated. 9 | 10 | ## Notes 11 | 12 | - These modules are not part of the core functionality of the bot and may change significantly as development continues. 13 | - Feel free to experiment with these modules, but please be aware that they may contain bugs or incomplete features. 14 | 15 | -------------------------------------------------------------------------------- /src/extras/bot_token_test.py: -------------------------------------------------------------------------------- 1 | # test to see if your TG bot token is available in the environment 2 | 3 | import os 4 | import logging 5 | 6 | # Set up basic logging 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | def get_bot_token(): 10 | bot_token = os.getenv('TELEGRAM_BOT_TOKEN') 11 | if not bot_token: 12 | logging.error("Failed to retrieve TELEGRAM_BOT_TOKEN from environment.") 13 | return None 14 | return bot_token 15 | 16 | if __name__ == "__main__": 17 | token = get_bot_token() 18 | if token: 19 | logging.info(f"Successfully retrieved bot token: {token[:4]}... (masked for security)") 20 | else: 21 | logging.critical("No bot token found. Exiting.") 22 | exit(1) 23 | -------------------------------------------------------------------------------- /src/token_usage_visualization.py: -------------------------------------------------------------------------------- 1 | # token_usage_visualization.py 2 | 3 | import matplotlib.pyplot as plt 4 | import json 5 | 6 | def generate_usage_chart(token_usage_file, output_image_file): 7 | try: 8 | with open(token_usage_file, 'r') as file: 9 | data = json.load(file) 10 | 11 | dates = list(data.keys()) 12 | usage = list(data.values()) 13 | 14 | plt.figure(figsize=(10, 6)) 15 | plt.bar(dates, usage, color='blue') 16 | plt.xlabel('Date') 17 | plt.ylabel('Token Usage') 18 | plt.xticks(rotation=45) 19 | plt.title('Daily Token Usage') 20 | plt.tight_layout() 21 | plt.savefig(output_image_file) 22 | 23 | except Exception as e: 24 | print(f"Error generating usage chart: {e}") 25 | return None 26 | -------------------------------------------------------------------------------- /src/rss_feeds.py: -------------------------------------------------------------------------------- 1 | # rss_feeds.py 2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | import feedparser 7 | from utils import sanitize_html, split_message 8 | 9 | RSS_FEED_URLS = { 10 | 'is_tuoreimmat': 'https://www.is.fi/rss/tuoreimmat.xml', 11 | 'your_custom_rss': 'http://example.com/rss' 12 | # Add more RSS feed URLs as needed 13 | } 14 | 15 | async def fetch_rss_feed(feed_key): 16 | """Fetch and format the RSS feed based on the feed key.""" 17 | feed_url = RSS_FEED_URLS.get(feed_key) 18 | if not feed_url: 19 | return f"Unknown RSS feed key: {feed_key}" 20 | 21 | feed = feedparser.parse(feed_url) 22 | formatted_entries = "\n".join([f"{entry.title}: {entry.link}" for entry in feed.entries[:5]]) 23 | return formatted_entries 24 | -------------------------------------------------------------------------------- /src/extras/api_fetch_news.py: -------------------------------------------------------------------------------- 1 | # api_fetch_news.py 2 | 3 | import httpx 4 | 5 | async def fetch_news(api_key: str, query: str): 6 | url = "https://newsapi.org/v2/everything" 7 | params = { 8 | "q": query, 9 | "apiKey": api_key, 10 | "language": "en", 11 | } 12 | async with httpx.AsyncClient() as client: 13 | response = await client.get(url, params=params) 14 | if response.status_code == 200: 15 | news_data = response.json() 16 | articles = news_data.get("articles", []) 17 | messages = [] 18 | for article in articles[:5]: # Limit to the first 5 articles 19 | title = article["title"] 20 | url = article["url"] 21 | messages.append(f"{title}\nRead more: {url}") 22 | return "\n\n".join(messages) 23 | else: 24 | return "Failed to fetch news." 25 | -------------------------------------------------------------------------------- /docker_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONTAINER_NAME="telegrambot-openai-api" 4 | IMAGE_NAME="telegrambot-openai-api" 5 | 6 | # Function to stop and remove existing container 7 | cleanup() { 8 | echo "Stopping container if it's running..." 9 | sudo docker stop ${CONTAINER_NAME} || true 10 | 11 | echo "Removing container if it exists..." 12 | sudo docker rm ${CONTAINER_NAME} || true 13 | } 14 | 15 | # Function to build and run the container 16 | deploy() { 17 | echo "Building Docker image..." 18 | sudo docker build --no-cache -t ${IMAGE_NAME} . 19 | if [[ $? -ne 0 ]]; then 20 | echo "Error: Docker image build failed." 21 | exit 1 22 | fi 23 | 24 | echo "Running Docker container..." 25 | sudo docker run --env-file .env --name ${CONTAINER_NAME} -d ${IMAGE_NAME} 26 | if [[ $? -ne 0 ]]; then 27 | echo "Error: Failed to run the Docker container." 28 | exit 1 29 | fi 30 | 31 | echo "Deployment complete." 32 | } 33 | 34 | # Execute the functions 35 | cleanup 36 | deploy 37 | -------------------------------------------------------------------------------- /src/api_get_global_time.py: -------------------------------------------------------------------------------- 1 | # api_get_global_time.py 2 | 3 | import subprocess 4 | 5 | TIMEZONES = [ 6 | "UTC", "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", 7 | "Europe/London", "Europe/Paris", "Europe/Berlin", "Europe/Helsinki", "Asia/Tokyo", 8 | "Asia/Shanghai", "Australia/Sydney", "Asia/Kolkata", "America/Sao_Paulo" 9 | ] 10 | 11 | def get_time_for_timezone(timezone): 12 | try: 13 | command = f"TZ={timezone} date +'%Y-%m-%d %H:%M:%S %Z'" 14 | result = subprocess.run(command, shell=True, capture_output=True, text=True) 15 | 16 | if result.returncode != 0: 17 | return f"Failed to fetch time for timezone {timezone}: {result.stderr.strip()}" 18 | 19 | return result.stdout.strip() 20 | except Exception as e: 21 | return f"Error executing date command for timezone {timezone}: {str(e)}" 22 | 23 | async def get_global_time(): 24 | times = {} 25 | for timezone in TIMEZONES: 26 | times[timezone] = get_time_for_timezone(timezone) 27 | return times -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore the cookies.txt file in config 2 | cookies.txt 3 | config/cookies.txt 4 | 5 | # Ignore all token files 6 | # i.e. files named bot_token.txt and api_token.txt, etc. 7 | **/api_token.txt 8 | **/bot_token.txt 9 | config/*.txt 10 | config/bot_token.txt 11 | config/api_token.txt 12 | api_token.txt 13 | bot_token.txt 14 | chat_history.txt 15 | **/chat_history.txt 16 | 17 | # Bot-specific data 18 | token_usage.json 19 | **/data/ 20 | data/ 21 | 22 | # Log files 23 | logs/ 24 | **/logs/ 25 | chat.log 26 | bot.log 27 | *.log 28 | 29 | # Python cache files 30 | **/__pycache__/ 31 | __pycache__/ 32 | *.pyc 33 | 34 | # audio messages directory 35 | audio_messages/ 36 | **/audio_messages/ 37 | 38 | # yt-dlp cache directory 39 | .cache/ 40 | **/.cache/ 41 | 42 | # (alt 43 | # transcriptions directory 44 | transcriptions/ 45 | # audio files directory 46 | audio/ 47 | 48 | # ignore potential transcribed files 49 | *.ogg 50 | *.mp3 51 | *.wav 52 | *.aiff 53 | *.aac 54 | *.mp4 55 | *.vtt 56 | *.srt 57 | *.part 58 | 59 | # Ignore virtual environments 60 | venv/ 61 | env/ 62 | .env -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | 3 | # Install dependencies & Rust 4 | RUN apt-get update && apt-get install -y \ 5 | ffmpeg \ 6 | lynx \ 7 | gcc \ 8 | git \ 9 | curl \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # Install Rust using rustup 14 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 15 | 16 | # Add Rust to PATH 17 | ENV PATH="/root/.cargo/bin:${PATH}" 18 | 19 | WORKDIR /app 20 | 21 | # Copy the requirements file first to leverage Docker cache 22 | COPY requirements.txt . 23 | 24 | # Install Python dependencies 25 | RUN pip3 install --no-cache-dir -r requirements.txt 26 | 27 | # Remove build dependencies to reduce image size 28 | RUN apt-get update && apt-get remove -y curl gcc git && apt-get autoremove -y && \ 29 | rm -rf /root/.cargo /root/.rustup /var/lib/apt/lists/* 30 | 31 | # Copy the entire project into the container 32 | COPY . . 33 | 34 | # Set environment variables for Docker runtime 35 | ENV PYTHONUNBUFFERED=1 36 | ENV RUNNING_IN_DOCKER=true 37 | 38 | # Optional: Debugging tools (disable in production) 39 | # RUN ls -lsa 40 | # RUN pwd 41 | 42 | # Default command to run the application 43 | CMD ["python3", "src/main.py"] 44 | -------------------------------------------------------------------------------- /src/rag_elasticsearch/elasticsearch_backend_search.py: -------------------------------------------------------------------------------- 1 | # elasticsearch_backend_search.py 2 | 3 | from elasticsearch import Elasticsearch 4 | 5 | # Function to search Elasticsearch 6 | def search_es(es, index, field, search_term): 7 | query = { 8 | "query": { 9 | "wildcard": { 10 | field: f"*{search_term}*" 11 | } 12 | }, 13 | "size": 5 14 | } 15 | response = es.search(index=index, body=query) 16 | return response 17 | 18 | # Connect to Elasticsearch 19 | es = Elasticsearch(["http://localhost:9200"]) 20 | 21 | # Check the connection 22 | if es.ping(): 23 | print("Connected to Elasticsearch!") 24 | else: 25 | print("Could not connect to Elasticsearch.") 26 | exit(1) 27 | 28 | # Ask user for search term 29 | search_term = input("Enter search term: ") 30 | 31 | # Define the index and field to search on 32 | index = "tg-bot-rag-index" # Replace with your index name 33 | field = "content" # Replace with the field you want to search 34 | 35 | # Perform the search 36 | result = search_es(es, index, field, search_term) 37 | 38 | # Print the search results in a Discord-friendly format 39 | print("Search Results:") 40 | for hit in result['hits']['hits']: 41 | print(f"Document ID: {hit['_id']}\nSnippet: {hit['_source'][field][:200]}...") # Print the first 200 characters 42 | print("---") -------------------------------------------------------------------------------- /src/rag_elasticsearch/backup_database.py: -------------------------------------------------------------------------------- 1 | # backup_database.py 2 | # backup database into a json dump (recommended!) 3 | 4 | import json 5 | from elasticsearch import Elasticsearch, helpers 6 | from datetime import datetime 7 | 8 | # Connect to Elasticsearch 9 | es = Elasticsearch(["http://localhost:9200"]) 10 | index_name = 'tg-bot-rag-index' 11 | 12 | def backup_current_state(es, index_name): 13 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 14 | backup_file = f'current_backup_{timestamp}.json' 15 | 16 | query = { 17 | "query": { 18 | "match_all": {} 19 | }, 20 | "size": 10000 21 | } 22 | 23 | response = es.search(index=index_name, body=query, scroll='2m') 24 | scroll_id = response['_scroll_id'] 25 | hits = response['hits']['hits'] 26 | 27 | all_hits = [] 28 | all_hits.extend(hits) 29 | 30 | while len(hits) > 0: 31 | response = es.scroll(scroll_id=scroll_id, scroll='2m') 32 | scroll_id = response['_scroll_id'] 33 | hits = response['hits']['hits'] 34 | all_hits.extend(hits) 35 | 36 | # Collect all documents into a list 37 | all_documents = [hit["_source"] for hit in all_hits] 38 | 39 | # Write the list of documents to the backup file 40 | with open(backup_file, 'w', encoding='utf-8', errors='replace') as f: 41 | json.dump(all_documents, f, ensure_ascii=False, indent=4) 42 | 43 | print(f"Backup completed to {backup_file}") 44 | 45 | backup_current_state(es, index_name) 46 | -------------------------------------------------------------------------------- /src/rag_elasticsearch/elasticsearch_find_empty_question_fields.py: -------------------------------------------------------------------------------- 1 | # elasticsearc_find_empty_question_fields.py 2 | 3 | from elasticsearch import Elasticsearch 4 | 5 | def find_empty_questions(index_name): 6 | es = Elasticsearch(["http://localhost:9200"]) # Adjust the connection details as necessary 7 | 8 | query = { 9 | "query": { 10 | "bool": { 11 | "should": [ 12 | {"bool": {"must_not": {"exists": {"field": "question"}}}}, 13 | {"term": {"question.keyword": ""}}, 14 | {"script_score": { 15 | "query": {"match_all": {}}, 16 | "script": { 17 | "source": "if (doc['question'].size() == 0) return 1; return doc['question'].value == null || doc['question'].value.isEmpty() ? 1 : 0;", 18 | "lang": "painless" 19 | } 20 | }} 21 | ], 22 | "minimum_should_match": 1 23 | } 24 | } 25 | } 26 | 27 | response = es.search(index=index_name, body=query) 28 | print(f"Found {response['hits']['total']['value']} documents with empty or missing 'question' fields.") 29 | 30 | # Example handling: Print out the document IDs 31 | for doc in response['hits']['hits']: 32 | print(f"Document ID: {doc['_id']}") 33 | 34 | if __name__ == "__main__": 35 | index_name = "tg-bot-rag-index" # Replace with your index 36 | # index_name = "your_index_name" # Replace with your actual index name 37 | find_empty_questions(index_name) -------------------------------------------------------------------------------- /.github/workflows/build-and-push.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push to GHCR 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | REGISTRY: ghcr.io 11 | IMAGE_NAME: flyingfathead/telegrambot-openai-api 12 | 13 | jobs: 14 | build-and-push: 15 | runs-on: ubuntu-latest 16 | permissions: 17 | contents: read 18 | packages: write 19 | 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v4 23 | 24 | - name: Log in to the Container registry 25 | uses: docker/login-action@v3 26 | with: 27 | registry: ${{ env.REGISTRY }} 28 | username: ${{ github.actor }} 29 | password: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: Set up Docker Buildx 32 | uses: docker/setup-buildx-action@v3 33 | 34 | - name: Build and push Docker image 35 | uses: docker/build-push-action@v5 36 | with: 37 | context: . 38 | push: true 39 | tags: | 40 | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest 41 | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} 42 | cache-from: type=gha 43 | cache-to: type=gha,mode=max 44 | 45 | - name: Stop and remove any existing container 46 | run: | 47 | docker stop telegrambot-openai-api || true 48 | docker rm telegrambot-openai-api || true 49 | 50 | - name: Run Docker container with environment variables 51 | run: | 52 | docker run \ 53 | --name telegrambot-openai-api \ 54 | --env OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} \ 55 | --env TELEGRAM_BOT_TOKEN=${{ secrets.TELEGRAM_BOT_TOKEN }} \ 56 | -d ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest 57 | -------------------------------------------------------------------------------- /src/rag_elasticsearch/elasticsearch_find_and_delete_entry.py: -------------------------------------------------------------------------------- 1 | # elasticsearch_find_and_delete_entry.py 2 | 3 | # find and delete mistaken entries from the elasticsearch database 4 | 5 | from elasticsearch import Elasticsearch 6 | 7 | def search_qa_pairs(es, index_name, search_term): 8 | query = { 9 | "query": { 10 | "multi_match": { 11 | "query": search_term, 12 | "fields": ["question", "answer"] 13 | } 14 | } 15 | } 16 | response = es.search(index=index_name, body=query) 17 | return response['hits']['hits'] 18 | 19 | def delete_document(es, index_name, doc_id): 20 | response = es.delete(index=index_name, id=doc_id) 21 | return response 22 | 23 | def main(): 24 | es = Elasticsearch(["http://localhost:9200"]) 25 | index_name = "tg-bot-rag-index" # Adjust the index name as needed 26 | 27 | search_term = input("Enter a search term to find Q&A pairs: ") 28 | hits = search_qa_pairs(es, index_name, search_term) 29 | 30 | if hits: 31 | print("Found Q&A pairs:") 32 | for hit in hits: 33 | print(f"ID: {hit['_id']}, Question: {hit['_source']['question']}, Answer: {hit['_source']['answer']}") 34 | 35 | delete_id = input("Enter the ID of the document to delete (leave blank to cancel): ").strip() 36 | if delete_id: 37 | confirm = input(f"Are you sure you want to delete the document with ID {delete_id}? (y/n): ").strip().lower() 38 | if confirm == 'y': 39 | response = delete_document(es, index_name, delete_id) 40 | print(f"Document with ID {delete_id} has been deleted. Response: {response}") 41 | else: 42 | print("Deletion cancelled.") 43 | else: 44 | print("No Q&A pairs found with the given search term.") 45 | 46 | if __name__ == "__main__": 47 | main() -------------------------------------------------------------------------------- /src/api_get_time.py: -------------------------------------------------------------------------------- 1 | # api_get_time.py 2 | # for fetching time according to coordinates; placeholder/WIP 3 | 4 | from timezonefinder import TimezoneFinder 5 | from datetime import datetime 6 | import pytz 7 | import httpx # For making requests to a geocoding API 8 | 9 | def get_coordinates_for_location(location_name: str) -> tuple: 10 | 11 | # Fetches the latitude and longitude for a given location name. 12 | # This function uses a geocoding API to convert location names to coordinates. 13 | # Replace 'Your_API_Key_Here' with your actual API key for the geocoding service. 14 | 15 | api_url = f"https://api.opencagedata.com/geocode/v1/json?q={location_name}&key=Your_API_Key_Here" 16 | try: 17 | response = httpx.get(api_url) 18 | data = response.json() 19 | # Extracting the first result as an example. You might want to refine this for accuracy. 20 | coordinates = data['results'][0]['geometry'] 21 | return coordinates['lat'], coordinates['lng'] 22 | except Exception as e: 23 | print(f"Error fetching coordinates for location '{location_name}': {e}") 24 | return None, None 25 | 26 | # Determines the local time for a given location name. 27 | def get_local_time_for_location(location_name: str) -> str: 28 | 29 | lat, lng = get_coordinates_for_location(location_name) 30 | if lat is None or lng is None: 31 | return "Could not determine the coordinates for the location." 32 | 33 | # Find the time zone for the given coordinates 34 | tf = TimezoneFinder() 35 | timezone_str = tf.timezone_at(lat=lat, lng=lng) 36 | if timezone_str is None: 37 | return "Could not determine the time zone for the location." 38 | 39 | # Get the current time in the determined time zone 40 | timezone = pytz.timezone(timezone_str) 41 | local_time = datetime.now(timezone) 42 | return local_time.strftime('%Y-%m-%d %H:%M:%S %Z%z') -------------------------------------------------------------------------------- /src/rag_elasticsearch/elasticsearch_test_search.py: -------------------------------------------------------------------------------- 1 | # elasticsearch_test_search.py 2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | from elasticsearch import Elasticsearch 7 | 8 | # Function to search Elasticsearch 9 | def search_es(es, index, field, search_term): 10 | query = { 11 | "query": { 12 | "bool": { 13 | "should": [ 14 | {"match_phrase": {field: {"query": search_term, "slop": 50}}}, 15 | {"match": {field: {"query": search_term, "operator": "or"}}} 16 | ] 17 | } 18 | }, 19 | "highlight": { 20 | "fields": { 21 | field: { 22 | "fragment_size": 200, 23 | "number_of_fragments": 5, 24 | "max_analyzed_offset": 1000000 # Adjust this value as needed 25 | } 26 | }, 27 | "pre_tags": ["["], 28 | "post_tags": ["]"] 29 | } 30 | } 31 | response = es.search(index=index, body=query, size=10) 32 | return response 33 | 34 | # Connect to Elasticsearch 35 | es = Elasticsearch(["http://localhost:9200"]) 36 | 37 | # Check the connection 38 | if es.ping(): 39 | print("Connected to Elasticsearch!") 40 | else: 41 | print("Could not connect to Elasticsearch.") 42 | exit(1) 43 | 44 | # Ask user for search term 45 | search_term = input("Enter search term: ") 46 | 47 | # Define the index and field to search on 48 | index = "tg-bot-rag-index" # Replace with your index 49 | field = "content" # Replace with the field you want to search 50 | 51 | # Perform the search 52 | result = search_es(es, index, field, search_term) 53 | 54 | # Print the search results 55 | print("Search Results:") 56 | for hit in result['hits']['hits']: 57 | # print("Document ID:", hit["_id"]) 58 | # print("Score:", hit["_score"]) # Optional: Display the relevance score 59 | if "highlight" in hit: 60 | print("Highlighted Snippets:") 61 | for highlight in hit["highlight"][field]: 62 | print(highlight) 63 | print("---\n") -------------------------------------------------------------------------------- /src/api_get_additional_weather_data.py: -------------------------------------------------------------------------------- 1 | # api_get_additional_weather_data.py 2 | 3 | import logging 4 | import sys 5 | import os 6 | import re 7 | import subprocess 8 | import asyncio 9 | 10 | ## NOTE: this is ONLY for example purposes! 11 | async def get_additional_data_dump(): 12 | try: 13 | # Execute the lynx command and capture the output 14 | command = 'lynx --dump -nolist https://www.foreca.fi/' 15 | process = await asyncio.create_subprocess_shell( 16 | command, 17 | stdout=asyncio.subprocess.PIPE, 18 | stderr=asyncio.subprocess.PIPE 19 | ) 20 | 21 | stdout, stderr = await process.communicate() 22 | 23 | if stderr: 24 | logging.error(f"Error in get_additional_data_dump: {stderr.decode()}") 25 | return "Error fetching data." 26 | 27 | output = stdout.decode() 28 | 29 | # Regular expressions to trim the output 30 | start_marker = r'Suomen sää juuri nyt' 31 | end_marker = r'Foreca YouTubessa' 32 | trimmed_output = re.search(rf'{start_marker}(.*?){end_marker}', output, re.DOTALL) 33 | 34 | # Return the trimmed output if markers are found 35 | if trimmed_output: 36 | debug_output = trimmed_output.group(1) 37 | 38 | # Parsing the specific weather forecast section 39 | parsed_forecast = parse_foreca_data(debug_output) 40 | 41 | # Format the parsed data for output 42 | formatted_forecast = f"{parsed_forecast}" 43 | 44 | # Print the output for debugging 45 | logging.info(formatted_forecast) 46 | 47 | return formatted_forecast 48 | else: 49 | return "Start or stop marker not found in the output." 50 | 51 | except Exception as e: 52 | # Handle errors (e.g., lynx not installed, network issues) 53 | logging.error(f"Exception in get_additional_data_dump: {e}") 54 | return str(e) 55 | 56 | def parse_foreca_data(data): 57 | # Regular expressions to identify the start and end of the desired section 58 | start_marker = r'Sääennuste koko maahan' 59 | end_marker = r'Lähipäivien sää' 60 | 61 | # Extract the section 62 | forecast_section = re.search(rf'{start_marker}(.*?){end_marker}', data, re.DOTALL) 63 | if forecast_section: 64 | forecast_data = forecast_section.group(1).strip() 65 | # Further parsing can be done here to extract regional forecasts 66 | # Format the data for output 67 | return forecast_data 68 | else: 69 | return "Relevant weather forecast section not found." 70 | 71 | # Example usage 72 | # Assuming 'output' contains the lynx dump 73 | 74 | # Example usage 75 | if __name__ == "__main__": 76 | # Create an asyncio event loop 77 | loop = asyncio.get_event_loop() 78 | 79 | # Run the async function inside asyncio.run() 80 | result = loop.run_until_complete(get_additional_data_dump()) 81 | 82 | # Print the result 83 | print(result) 84 | -------------------------------------------------------------------------------- /src/timedate_handler.py: -------------------------------------------------------------------------------- 1 | # timedate_handler.py 2 | import datetime 3 | import pytz 4 | 5 | # Maps English day names from strftime() -> Finnish 6 | fi_days = { 7 | "Monday": "maanantai", 8 | "Tuesday": "tiistai", 9 | "Wednesday": "keskiviikko", 10 | "Thursday": "torstai", 11 | "Friday": "perjantai", 12 | "Saturday": "lauantai", 13 | "Sunday": "sunnuntai" 14 | } 15 | 16 | # Maps English month names -> Finnish “month in the partitive case” for typical date usage 17 | fi_months = { 18 | "January": "tammikuuta", 19 | "February": "helmikuuta", 20 | "March": "maaliskuuta", 21 | "April": "huhtikuuta", 22 | "May": "toukokuuta", 23 | "June": "kesäkuuta", 24 | "July": "heinäkuuta", 25 | "August": "elokuuta", 26 | "September": "syyskuuta", 27 | "October": "lokakuuta", 28 | "November": "marraskuuta", 29 | "December": "joulukuuta" 30 | } 31 | 32 | def get_ordinal_suffix(day_num: int) -> str: 33 | """ 34 | Returns the English ordinal suffix for a given day of the month, e.g. 35 | 1->"1st", 2->"2nd", 3->"3rd", 4->"4th", etc. 36 | """ 37 | if 11 <= (day_num % 100) <= 13: 38 | return "th" 39 | elif day_num % 10 == 1: 40 | return "st" 41 | elif day_num % 10 == 2: 42 | return "nd" 43 | elif day_num % 10 == 3: 44 | return "rd" 45 | else: 46 | return "th" 47 | 48 | def get_english_timestamp_str(now_utc: datetime.datetime) -> str: 49 | """ 50 | Returns an English-formatted date/time string, e.g.: 51 | 'Monday, April 9th, 2025 | Time (UTC): 12:34:56' 52 | """ 53 | day_of_week_eng = now_utc.strftime("%A") # e.g. "Monday" 54 | month_name_eng = now_utc.strftime("%B") # e.g. "April" 55 | day_num = int(now_utc.strftime("%d")) 56 | year_str = now_utc.strftime("%Y") 57 | suffix = get_ordinal_suffix(day_num) 58 | date_str = f"{month_name_eng} {day_num}{suffix}, {year_str}" 59 | time_str = now_utc.strftime("%H:%M:%S") # "12:34:56" 60 | 61 | return f"{day_of_week_eng}, {date_str} | Time (UTC): {time_str}" 62 | 63 | def get_finnish_timestamp_str(now_utc: datetime.datetime) -> str: 64 | """ 65 | Returns a Finnish-formatted date/time string. For example: 66 | 'maanantai, 9. huhtikuuta 2025, klo 15:34:56 Suomen aikaa' 67 | 68 | (Adjust as you like for Finnish grammar.) 69 | """ 70 | helsinki_tz = pytz.timezone("Europe/Helsinki") 71 | now_fin = now_utc.astimezone(helsinki_tz) 72 | 73 | weekday_eng = now_fin.strftime("%A") # e.g. "Monday" 74 | day_of_week_fi = fi_days.get(weekday_eng, weekday_eng) 75 | 76 | month_eng = now_fin.strftime("%B") # e.g. "April" 77 | month_fi = fi_months.get(month_eng, month_eng) 78 | 79 | day_num = int(now_fin.strftime("%d")) # e.g. 9 80 | year_str = now_fin.strftime("%Y") # e.g. "2025" 81 | 82 | # For Finnish style we might do e.g. "9. huhtikuuta 2025" 83 | date_str_fi = f"{day_num}. {month_fi} {year_str}" 84 | 85 | time_str_fi = now_fin.strftime("%H:%M:%S") # "15:34:56" 86 | # For instance: "maanantai, 9. huhtikuuta 2025, klo 15:34:56 Suomen aikaa" 87 | return f"{day_of_week_fi}, {date_str_fi}, klo {time_str_fi} Suomen aikaa" 88 | -------------------------------------------------------------------------------- /src/configmerger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | from config_paths import CONFIG_PATH 4 | 5 | def update_config(main_config_file, custom_config_file): 6 | # Read the custom configuration into a dictionary 7 | custom_config = {} 8 | with open(custom_config_file, 'r') as file: 9 | for line in file: 10 | if "=" in line and not line.startswith("#"): 11 | key, value = line.split('=', 1) 12 | custom_config[key.strip()] = value.strip() 13 | 14 | # Update the main configuration file 15 | updated_lines = [] 16 | updated_keys = [] 17 | with open(main_config_file, 'r') as file: 18 | for line in file: 19 | if "=" in line and not line.startswith("#"): 20 | key = line.split('=', 1)[0].strip() 21 | if key in custom_config: 22 | line = f"{key} = {custom_config[key]}\n" 23 | updated_keys.append(key) 24 | updated_lines.append(line) 25 | 26 | # Write the updated lines back to the main config file 27 | with open(main_config_file, 'w') as file: 28 | file.writelines(updated_lines) 29 | 30 | # Inform user about the updated keys 31 | if updated_keys: 32 | print("The following parameters have been updated:") 33 | for key in updated_keys: 34 | print(f"- {key}") 35 | else: 36 | print("No parameters were updated.") 37 | 38 | if __name__ == "__main__": 39 | if len(sys.argv) != 2: 40 | print("Usage: configmerger.py ") 41 | sys.exit(1) 42 | 43 | main_config_file = CONFIG_PATH 44 | custom_config_file = sys.argv[1] 45 | 46 | update_config(main_config_file, custom_config_file) 47 | print(f"Configuration from {custom_config_file} has been merged into {main_config_file}.") 48 | 49 | # --- 50 | # # // (old method) 51 | # import sys 52 | # import re 53 | 54 | # def update_config(main_config_file, custom_config_file): 55 | # # Read the custom configuration into a dictionary 56 | # custom_config = {} 57 | # with open(custom_config_file, 'r') as file: 58 | # for line in file: 59 | # if "=" in line and not line.startswith("#"): 60 | # key, value = line.split('=', 1) 61 | # custom_config[key.strip()] = value.strip() 62 | 63 | # # Update the main configuration file 64 | # updated_lines = [] 65 | # with open(main_config_file, 'r') as file: 66 | # for line in file: 67 | # if "=" in line and not line.startswith("#"): 68 | # key = line.split('=', 1)[0].strip() 69 | # if key in custom_config: 70 | # line = f"{key} = {custom_config[key]}\n" 71 | # updated_lines.append(line) 72 | 73 | # # Write the updated lines back to the main config file 74 | # with open(main_config_file, 'w') as file: 75 | # file.writelines(updated_lines) 76 | 77 | # if __name__ == "__main__": 78 | # if len(sys.argv) != 3: 79 | # print("Usage: configmerger.py ") 80 | # sys.exit(1) 81 | 82 | # main_config_file = sys.argv[1] 83 | # custom_config_file = sys.argv[2] 84 | 85 | # update_config(main_config_file, custom_config_file) 86 | # print(f"Configuration from {custom_config_file} has been merged into {main_config_file}.") -------------------------------------------------------------------------------- /src/rag_elasticsearch/review_and_fix_entries.py: -------------------------------------------------------------------------------- 1 | # review_and_fix_entries.py 2 | 3 | import json 4 | from elasticsearch import Elasticsearch, helpers 5 | 6 | # Configuration 7 | es = Elasticsearch(["http://localhost:9200"]) 8 | index_name = 'tg-bot-rag-index' 9 | problematic_answer = "<[get_defcon_status]>" 10 | backup_file = 'backup_before_correction.json' 11 | 12 | def fetch_problematic_entries(es, index_name, problematic_answer): 13 | query = { 14 | "query": { 15 | "match": { 16 | "answer": problematic_answer 17 | } 18 | }, 19 | "size": 10000 20 | } 21 | 22 | response = es.search(index=index_name, body=query, scroll='2m') 23 | scroll_id = response['_scroll_id'] 24 | hits = response['hits']['hits'] 25 | 26 | all_hits = [] 27 | all_hits.extend(hits) 28 | 29 | while len(hits) > 0: 30 | response = es.scroll(scroll_id=scroll_id, scroll='2m') 31 | scroll_id = response['_scroll_id'] 32 | hits = response['hits']['hits'] 33 | all_hits.extend(hits) 34 | 35 | return all_hits 36 | 37 | def save_backup(entries, backup_file): 38 | with open(backup_file, 'w', encoding='utf-8', errors='replace') as f: 39 | json.dump(entries, f, ensure_ascii=False, indent=4) 40 | print(f"Backup completed to {backup_file}") 41 | 42 | def review_and_fix_entries(entries): 43 | corrected_entries = [] 44 | index = 0 45 | 46 | while index < len(entries): 47 | entry = entries[index] 48 | print(f"\nQuestion: {entry['_source']['question']}") 49 | print(f"Answer: {entry['_source']['answer']}") 50 | action = input("Enter action (n = next, p = previous, s = skip, e = edit, d = delete): ").strip().lower() 51 | 52 | if action == 'e': 53 | new_answer = input("Enter the new answer: ").strip() 54 | entry['_source']['answer'] = new_answer 55 | corrected_entries.append(entry) 56 | print("Entry updated.") 57 | index += 1 58 | elif action == 's': 59 | index += 1 60 | elif action == 'd': 61 | confirm_delete = input("Are you sure you want to delete this entry? (y/n): ").strip().lower() 62 | if confirm_delete == 'y': 63 | entry['_source'] = None # Mark for deletion 64 | corrected_entries.append(entry) 65 | print("Entry marked for deletion.") 66 | index += 1 67 | elif action == 'p': 68 | if index > 0: 69 | index -= 1 70 | else: 71 | print("You are at the first entry.") 72 | elif action == 'n': 73 | index += 1 74 | else: 75 | print("Invalid action. Please use n, p, s, e, or d.") 76 | 77 | return corrected_entries 78 | 79 | def apply_corrections(es, index_name, entries): 80 | actions = [] 81 | for entry in entries: 82 | if entry['_source'] is None: 83 | actions.append({ 84 | "_op_type": "delete", 85 | "_index": index_name, 86 | "_id": entry['_id'] 87 | }) 88 | else: 89 | actions.append({ 90 | "_op_type": "index", 91 | "_index": index_name, 92 | "_id": entry['_id'], 93 | "_source": entry['_source'] 94 | }) 95 | 96 | helpers.bulk(es, actions) 97 | print(f"Applied corrections to {len(entries)} entries in '{index_name}'.") 98 | 99 | # Fetch problematic entries 100 | problematic_entries = fetch_problematic_entries(es, index_name, problematic_answer) 101 | 102 | # Save backup of problematic entries 103 | save_backup(problematic_entries, backup_file) 104 | 105 | # Review and fix entries 106 | corrected_entries = review_and_fix_entries(problematic_entries) 107 | 108 | # Apply corrections to Elasticsearch 109 | apply_corrections(es, index_name, corrected_entries) 110 | -------------------------------------------------------------------------------- /docker_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # horizontal line 4 | function hzline() { printf '%*s\n' "${COLUMNS:-$(tput cols)}" '' | tr ' ' - ; } 5 | 6 | # Check if Docker is installed 7 | if ! [ -x "$(command -v docker)" ]; then 8 | echo 'Error: Docker is not installed. Please install Docker first: https://www.docker.com/get-started/' >&2 9 | exit 1 10 | fi 11 | 12 | # Welcome Message 13 | echo "" && 14 | hzline && 15 | echo "::: Welcome to the TelegramBot-OpenAI-API setup." && 16 | echo "::: Source code & repo: https://github.com/FlyingFathead/TelegramBot-OpenAI-API/" && 17 | hzline && 18 | echo 19 | 20 | # Check if .env file already exists and prompt the user 21 | if [ -f .env ]; then 22 | echo "Warning: A .env file already exists in this directory." 23 | while true; do 24 | read -p "Do you want to overwrite the existing .env file? (y/n): " yn 25 | case $yn in 26 | [Yy]* ) break;; 27 | [Nn]* ) echo "Exiting setup without overwriting .env file."; exit 0;; 28 | * ) echo "Please answer yes or no.";; 29 | esac 30 | done 31 | fi 32 | 33 | # Function to check for empty or invalid inputs for required keys 34 | validate_input() { 35 | if [[ -z "$1" || ${#1} -lt 10 ]]; then 36 | echo "Error: Input cannot be blank or too short (must be at least 10 characters). Please try again." 37 | return 1 38 | fi 39 | return 0 40 | } 41 | 42 | # Prompt for required API keys (OpenAI and Telegram) 43 | while true; do 44 | read -p "Please enter your OpenAI API key (required): " OPENAI_API_KEY 45 | validate_input "$OPENAI_API_KEY" && break 46 | done 47 | 48 | while true; do 49 | read -p "Please enter your Telegram Bot API Token (required): " TELEGRAM_BOT_TOKEN 50 | validate_input "$TELEGRAM_BOT_TOKEN" && break 51 | done 52 | 53 | # Prompt for optional API keys (user can leave them blank) 54 | hzline && 55 | echo "::: Below are optional keys for the bot's supported API functions." && 56 | echo "::: They're not required for basic functionality, but are a great enhancement." && 57 | echo "::: If you don't have an API key right now, just press ENTER to leave them blank." && 58 | hzline && 59 | read -p "Please enter your Perplexity API key (optional): " PERPLEXITY_API_KEY 60 | read -p "Please enter your OpenWeatherMap API key (optional): " OPENWEATHERMAP_API_KEY 61 | read -p "Please enter your WeatherAPI key (optional): " WEATHERAPI_KEY 62 | read -p "Please enter your MapTiler API key (optional): " MAPTILER_API_KEY 63 | read -p "Please enter your Openrouteservice API key (optional): " OPENROUTESERVICE_API_KEY 64 | 65 | # Create a .env file with the required and optional keys 66 | hzline && 67 | echo "Generating .env file..." 68 | cat < .env 69 | OPENAI_API_KEY=$OPENAI_API_KEY 70 | TELEGRAM_BOT_TOKEN=$TELEGRAM_BOT_TOKEN 71 | OPENWEATHERMAP_API_KEY=$OPENWEATHERMAP_API_KEY 72 | WEATHERAPI_KEY=$WEATHERAPI_KEY 73 | MAPTILER_API_KEY=$MAPTILER_API_KEY 74 | OPENROUTESERVICE_API_KEY=$OPENROUTESERVICE_API_KEY 75 | PERPLEXITY_API_KEY=$PERPLEXITY_API_KEY 76 | # Additional variables can be added here 77 | EOL 78 | 79 | echo "Environment variables saved to .env." && 80 | hzline && 81 | 82 | # Instructions for the next steps 83 | echo 84 | echo "Next Steps:" 85 | echo "1. Build the Docker image by running the following command:" 86 | echo " sudo docker build -t telegrambot-openai-api ." 87 | echo 88 | echo "2. After building the image, start the bot container using:" 89 | echo " sudo docker run --env-file .env --name telegrambot-openai-api -d telegrambot-openai-api" 90 | echo 91 | echo "3. Check the container status with:" 92 | echo " sudo docker ps" 93 | echo 94 | echo "4. Check the logs with:" 95 | echo " sudo docker logs telegrambot-openai-api" 96 | echo 97 | echo "5. Stop the container with:" 98 | echo " sudo docker stop " 99 | echo 100 | echo "After that, you're all set! Enjoy, and don't forget to start the repository if you like it. :-)" 101 | hzline && 102 | echo "" 103 | 104 | # optional build & run function 105 | function build_and_run() { 106 | # Build Docker image 107 | sudo docker build -t telegrambot-openai-api . 108 | if [[ $? -ne 0 ]]; then 109 | echo "Error: Docker image build failed." 110 | exit 1 111 | fi 112 | 113 | # Run Docker container 114 | sudo docker run --env-file .env -d telegrambot-openai-api 115 | if [[ $? -ne 0 ]]; then 116 | echo "Error: Failed to run the Docker container." 117 | exit 1 118 | fi 119 | } 120 | 121 | # build_and_run 122 | -------------------------------------------------------------------------------- /src/perplexity_handler.py: -------------------------------------------------------------------------------- 1 | # ATTN: this module is currently not in use as of v0.737. 2 | 3 | # # perplexity_handler.py 4 | 5 | # import logging 6 | # import json 7 | # from telegram.constants import ParseMode 8 | # from api_perplexity_search import query_perplexity, translate_response_chunked, split_message 9 | 10 | # MAX_TELEGRAM_MESSAGE_LENGTH = 4000 11 | 12 | # async def handle_query_perplexity(context, update, chat_id, function_call, user_message, bot, chat_history): 13 | # arguments = json.loads(function_call.get('arguments', '{}')) 14 | # question = arguments.get('question', '') 15 | 16 | # if not question: 17 | # logging.warning("No question was provided for the Perplexity query.") 18 | # await context.bot.send_message( 19 | # chat_id=chat_id, 20 | # text="No question was provided for the Perplexity query. Please provide a question.", 21 | # parse_mode=ParseMode.HTML 22 | # ) 23 | # return True 24 | 25 | # # Make the asynchronous API call to query Perplexity 26 | # perplexity_response = await query_perplexity(context.bot, chat_id, question) 27 | 28 | # # Log the raw Perplexity API response for debugging 29 | # logging.info(f"Raw Perplexity API Response: {perplexity_response}") 30 | 31 | # if perplexity_response == "[System message: Perplexity API is currently unavailable due to server issues. Inform the user about this issue in their language.]": 32 | # # Handle the system message for API unavailability 33 | # logging.error("Perplexity API is down. Informing the model to notify the user.") 34 | # await context.bot.send_message( 35 | # chat_id=chat_id, 36 | # text="Perplexity API is currently unavailable due to server issues. Please try again later.", 37 | # parse_mode=ParseMode.HTML 38 | # ) 39 | # return True 40 | 41 | # if perplexity_response is None: 42 | # logging.error("No valid response from Perplexity, Perplexity response was None or empty.") 43 | # await context.bot.send_message( 44 | # chat_id=chat_id, 45 | # text="No valid response from Perplexity, Perplexity response was None or empty.", 46 | # parse_mode=ParseMode.HTML 47 | # ) 48 | # return True 49 | 50 | # # Flag for translation in progress 51 | # context.user_data['active_translation'] = True 52 | 53 | # # Translate or process the response as necessary 54 | # bot_reply_formatted = await translate_response_chunked(bot, user_message, perplexity_response, context, update) 55 | 56 | # # After translation or processing is completed, clear the active translation flag 57 | # context.user_data.pop('active_translation', None) 58 | 59 | # if isinstance(bot_reply_formatted, bool) and bot_reply_formatted: # Check if translation function returned successfully 60 | # return True # Ensure function exits after handling success 61 | 62 | # if not bot_reply_formatted or bot_reply_formatted.startswith("Error"): 63 | # logging.error("Error processing or translating the Perplexity response.") 64 | # await context.bot.send_message( 65 | # chat_id=chat_id, 66 | # text="Error processing or translating the Perplexity response.", 67 | # parse_mode=ParseMode.HTML 68 | # ) 69 | # return True 70 | 71 | # # Append the bot's reply to the chat history before sending it 72 | # chat_history.append({"role": "assistant", "content": bot_reply_formatted}) 73 | # context.chat_data['chat_history'] = chat_history # Update the chat data with the new history 74 | 75 | # if len(bot_reply_formatted) > MAX_TELEGRAM_MESSAGE_LENGTH: 76 | # # Split the message into chunks if it exceeds the maximum length 77 | # chunks = split_message(bot_reply_formatted) 78 | 79 | # for chunk in chunks: 80 | # await context.bot.send_message( 81 | # chat_id=chat_id, 82 | # text=chunk, 83 | # parse_mode=ParseMode.HTML 84 | # ) 85 | # logging.info(f"Sent chunk with length: {len(chunk)}") 86 | # else: 87 | # await context.bot.send_message( 88 | # chat_id=chat_id, 89 | # text=bot_reply_formatted, 90 | # parse_mode=ParseMode.HTML 91 | # ) 92 | # logging.info(f"Sent message with length: {len(bot_reply_formatted)}") 93 | 94 | # logging.info("Response sent successfully, no further actions should be triggered.") 95 | # return True 96 | -------------------------------------------------------------------------------- /src/api_get_maptiler.py: -------------------------------------------------------------------------------- 1 | # api_get_maptiler.py 2 | 3 | import logging 4 | import httpx 5 | import os 6 | 7 | # the function below can be implemented to use for POI lookups 8 | async def get_location_from_coordinates(latitude, longitude): 9 | logging.info(f"Fetching location information for coordinates: Latitude: {latitude}, Longitude: {longitude}") 10 | # Retrieve MapTiler API key from environment variables 11 | api_key = os.getenv('MAPTILER_API_KEY') 12 | if not api_key: 13 | logging.info("[WARNING] MapTiler API key not set. You need to set the 'MAPTILER_API_KEY' environment variable for this function to work!") 14 | return "MapTiler API key not set." 15 | 16 | # Construct the API request URL for reverse geocoding 17 | reverse_geocode_url = f"https://api.maptiler.com/geocoding/{longitude},{latitude}.json?key={api_key}" 18 | logging.info(f"Making API request to URL: {reverse_geocode_url}") 19 | 20 | async with httpx.AsyncClient() as client: 21 | response = await client.get(reverse_geocode_url) 22 | logging.info(f"Received response with status code: {response.status_code}") 23 | 24 | if response.status_code == 200: 25 | data = response.json() 26 | logging.info(f"Response data: {data}") 27 | # Process the response data to extract useful information 28 | # For example, you might extract the nearest city name, points of interest, etc. 29 | # Return this information 30 | return data 31 | else: 32 | logging.info(f"Failed to fetch location information: {response.text}") 33 | return "Failed to fetch location information." 34 | 35 | # this function can look up coordinates from a given address 36 | async def get_coordinates_from_address(address): 37 | logging.info(f"Fetching coordinates for address: {address}") 38 | # Retrieve MapTiler API key from environment variables 39 | api_key = os.getenv('MAPTILER_API_KEY') 40 | if not api_key: 41 | logging.error("[ERROR] MapTiler API key not set. You need to set the 'MAPTILER_API_KEY' environment variable for this function to work!") 42 | return "MapTiler API key not set." 43 | 44 | # Construct the API request URL for geocoding 45 | geocode_url = f"https://api.maptiler.com/geocoding/{address}.json?key={api_key}" 46 | logging.info(f"Making API request to URL: {geocode_url}") 47 | 48 | async with httpx.AsyncClient() as client: 49 | response = await client.get(geocode_url) 50 | logging.info(f"Received response with status code: {response.status_code}") 51 | 52 | if response.status_code == 200: 53 | data = response.json() 54 | logging.info(f"Response data: {data}") 55 | # Assuming the first feature is the most relevant match 56 | if data['features']: 57 | first_feature = data['features'][0] 58 | coordinates = first_feature['geometry']['coordinates'] 59 | # Coordinates are returned as [longitude, latitude] 60 | return {'longitude': coordinates[0], 'latitude': coordinates[1]} 61 | else: 62 | logging.info("No features found for the provided address.") 63 | return "No location found for the provided address." 64 | else: 65 | logging.error(f"Failed to fetch coordinates: {response.text}") 66 | return "Failed to fetch coordinates." 67 | 68 | # get a map image (for maptiler's paid plan only) 69 | async def get_static_map_image(latitude, longitude, zoom, width, height, mapId='streets'): 70 | api_key = os.getenv('MAPTILER_API_KEY') 71 | if not api_key: 72 | logging.error("[ERROR] MapTiler API key not set.") 73 | return "MapTiler API key not set." 74 | 75 | scale = '@2x' # For HiDPI/Retina maps 76 | format = 'png' # Output format 77 | url = f"https://api.maptiler.com/maps/{mapId}/static/{longitude},{latitude},{zoom}/{width}x{height}{scale}.{format}?key={api_key}" 78 | 79 | async with httpx.AsyncClient() as client: 80 | response = await client.get(url) 81 | if response.status_code == 200: 82 | # Save the image to a file for debugging 83 | with open('map_image.png', 'wb') as f: 84 | f.write(response.content) 85 | logging.info("Static map image saved successfully.") 86 | return response.content # Returns the image data 87 | else: 88 | logging.error(f"Failed to generate static map: Status code {response.status_code}") 89 | return None 90 | -------------------------------------------------------------------------------- /src/api_key.py: -------------------------------------------------------------------------------- 1 | # api_key.py 2 | # Read the OPENAI API key with configurable fallback 3 | 4 | import os 5 | import sys 6 | import configparser 7 | import logging 8 | from config_paths import CONFIG_PATH, API_TOKEN_PATH # Import the centralized CONFIG_PATH 9 | 10 | # Set up basic logging 11 | # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 12 | 13 | # Flag to enable or disable fallback to environment variable if the key is not found in the file 14 | ENABLE_KEY_READING_FALLBACK = True 15 | 16 | def read_env_api_key(): 17 | """ 18 | Reads the OpenAI API key from the environment variable. 19 | 20 | Returns: 21 | str: The API key if found, else None. 22 | """ 23 | api_key = os.getenv('OPENAI_API_KEY') 24 | if api_key: 25 | logging.info("OpenAI API key loaded from environment variable.") 26 | return api_key 27 | 28 | def get_api_key(config_path=CONFIG_PATH, token_file=API_TOKEN_PATH): 29 | """ 30 | Retrieves the OpenAI API key, prioritizing the method as per the config file or defaults. 31 | 32 | Args: 33 | config_path (str): Path to the configuration file. 34 | token_file (str): Path to the file containing the API key. 35 | 36 | Returns: 37 | str: The OpenAI API key. 38 | 39 | Raises: 40 | SystemExit: If the API key is not found through any method. 41 | """ 42 | config = configparser.ConfigParser() 43 | api_key = None 44 | 45 | try: 46 | config.read(config_path) 47 | if not config.sections(): 48 | logging.warning(f"Config file '{config_path}' is missing or empty. OpenAI API key reading falling back to environment variable preference.") 49 | prefer_env = True # Defaulting to True if config read fails 50 | else: 51 | prefer_env = config.getboolean('DEFAULT', 'PreferEnvForAPIKey', fallback=True) 52 | logging.info(f"Preference for environment variables for the OpenAI API key set in config: {'Yes' if prefer_env else 'No'}") 53 | except Exception as e: 54 | logging.error(f"Failed to read OpenAI API key from config file: {e}") 55 | prefer_env = True # Defaulting to True if config read fails 56 | logging.info("Defaulting to environment variable preference due to config read failure.") 57 | 58 | if prefer_env: 59 | api_key = read_env_api_key() 60 | if api_key: 61 | return api_key.strip() 62 | 63 | if not api_key: 64 | try: 65 | with open(token_file, 'r') as file: 66 | api_key = file.read().strip() 67 | if api_key: 68 | logging.info("OpenAI API key loaded from file.") 69 | return api_key 70 | except FileNotFoundError: 71 | logging.warning("OpenAI API token file not found.") 72 | if not prefer_env and ENABLE_KEY_READING_FALLBACK: 73 | api_key = read_env_api_key() 74 | if api_key: 75 | return api_key.strip() 76 | 77 | if not api_key: 78 | logging.error("The OPENAI_API_KEY environment variable is not set, and `api_token.txt` was not found. Please set either one and adjust `config.ini` if needed for the preferred load order.") 79 | sys.exit(1) 80 | 81 | # Example usage for standalone testing 82 | if __name__ == "__main__": 83 | api_key = get_api_key() 84 | print("OpenAI API Key (for testing & debugging only):", api_key) 85 | 86 | # ~~~ old method below ~~~ 87 | # import os 88 | # import sys 89 | # import configparser 90 | 91 | # # set `prefer_env` to `True` if you wish to prioritize the environment variable over the configuration text file 92 | # # (determines load order) 93 | # def get_api_key(): 94 | # config = configparser.ConfigParser() 95 | # config.read('config.ini') 96 | # prefer_env = config.getboolean('DEFAULT', 'PreferEnvForAPIKey', fallback=True) 97 | 98 | # if prefer_env: 99 | # api_key = os.getenv('OPENAI_API_KEY') 100 | # if api_key is not None: 101 | # return api_key 102 | 103 | # try: 104 | # with open('api_token.txt', 'r') as file: 105 | # return file.read().strip() 106 | # except FileNotFoundError: 107 | # if not prefer_env: 108 | # api_key = os.getenv('OPENAI_API_KEY') 109 | # if api_key is not None: 110 | # return api_key 111 | 112 | # print("The OPENAI_API_KEY environment variable is not set, and `api_token.txt` was not found. Please set either one and adjust `config.ini` if needed for the preferred load order.") 113 | # sys.exit(1) 114 | -------------------------------------------------------------------------------- /src/calc_module.py: -------------------------------------------------------------------------------- 1 | # calc_module.py 2 | # 3 | # From: 4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | # https://github.com/FlyingFathead/TelegramBot-OpenAI-API 6 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 7 | # (updated Oct 13, 2024) 8 | 9 | import ast 10 | import operator 11 | import logging 12 | import re 13 | 14 | # Initialize the logger 15 | logger = logging.getLogger(__name__) 16 | 17 | # Below are some safety measures so that the outputs aren't absolutely insane in length. 18 | # Define maximum allowed length for the result and maximum magnitude 19 | MAX_OUTPUT_LENGTH = 500 # Adjust as necessary 20 | MAX_MAGNITUDE = 1e100 # Example maximum magnitude 21 | 22 | def preprocess_expression(expression: str) -> str: 23 | """ 24 | Preprocess the input expression to handle natural language constructs like 'of' and percentages. 25 | For example, convert '0.1% of 200000000' to '0.1 / 100 * 200000000'. 26 | """ 27 | # Handle 'of' by replacing it with '*' 28 | expression = re.sub(r'\bof\b', '*', expression, flags=re.IGNORECASE) 29 | 30 | # Handle percentages: convert 'X%' to '(X/100)' 31 | expression = re.sub(r'(\d+(\.\d+)?)\s*%', r'(\1/100)', expression) 32 | 33 | logger.debug(f"Preprocessed expression: {expression}") 34 | return expression 35 | 36 | def safe_eval(expression: str): 37 | # Replace '^' with '**' for exponentiation 38 | expression = expression.replace('^', '**') 39 | 40 | allowed_operators = { 41 | ast.Add: operator.add, 42 | ast.Sub: operator.sub, 43 | ast.Mult: operator.mul, 44 | ast.Div: operator.truediv, 45 | ast.Mod: operator.mod, 46 | ast.Pow: operator.pow 47 | } 48 | 49 | def _eval(node): 50 | if isinstance(node, ast.BinOp): 51 | if type(node.op) in allowed_operators: 52 | left = _eval(node.left) 53 | right = _eval(node.right) 54 | op_func = allowed_operators[type(node.op)] 55 | result = op_func(left, right) 56 | 57 | # Logging the operation being performed 58 | logger.debug(f"Evaluating: {left} {type(node.op).__name__} {right} = {result}") 59 | 60 | # Check if the result is within acceptable magnitude 61 | if abs(result) > MAX_MAGNITUDE: 62 | error_msg = f"Result magnitude exceeds the maximum allowed limit: {result}" 63 | logger.error(error_msg) 64 | raise ValueError(error_msg) 65 | 66 | return result 67 | else: 68 | error_msg = f"Unsupported operation: {type(node.op).__name__}" 69 | logger.error(error_msg) 70 | raise ValueError(error_msg) 71 | elif isinstance(node, ast.Num): 72 | logger.debug(f"Numeric literal: {node.n}") 73 | return node.n 74 | elif isinstance(node, ast.Expression): 75 | return _eval(node.body) 76 | elif isinstance(node, ast.UnaryOp) and isinstance(node.op, (ast.UAdd, ast.USub)): 77 | operand = _eval(node.operand) 78 | if isinstance(node.op, ast.UAdd): 79 | return +operand 80 | elif isinstance(node.op, ast.USub): 81 | return -operand 82 | else: 83 | error_msg = f"Unsupported type: {type(node).__name__}" 84 | logger.error(error_msg) 85 | raise ValueError(error_msg) 86 | 87 | try: 88 | node = ast.parse(expression, mode='eval') 89 | logger.info(f"Parsed expression: {expression}") 90 | return _eval(node.body) 91 | except Exception as e: 92 | logger.exception(f"Error parsing or evaluating expression: {expression}") 93 | raise 94 | 95 | async def calculate_expression(expression: str): 96 | logger.info(f"Calculating expression: {expression}") 97 | try: 98 | # Preprocess the expression to handle 'of' and '%' 99 | processed_expression = preprocess_expression(expression) 100 | 101 | result = safe_eval(processed_expression) 102 | 103 | # Check if the result length is within limits 104 | result_str = str(result) 105 | if len(result_str) > MAX_OUTPUT_LENGTH: 106 | error_message = f"Result exceeds the maximum allowed length of {MAX_OUTPUT_LENGTH} characters." 107 | logger.error(error_message) 108 | return error_message 109 | 110 | # Construct the success message 111 | result_message = f"The result of {expression} is {result}." 112 | logger.info(f"Calculation successful: {result_message}") 113 | return result_message 114 | except ValueError as ve: 115 | # Specific handling for ValueError (e.g., unsupported operations) 116 | error_message = f"Error evaluating expression `{expression}`: {str(ve)}" 117 | logger.error(error_message) 118 | return error_message 119 | except Exception as e: 120 | # General error handling 121 | error_message = f"An unexpected error occurred while evaluating `{expression}`: {str(e)}" 122 | logger.error(error_message) 123 | return error_message 124 | -------------------------------------------------------------------------------- /src/api_get_website_dump.py: -------------------------------------------------------------------------------- 1 | # api_get_website_dump.py 2 | 3 | import urllib.parse 4 | import subprocess 5 | import logging 6 | import tiktoken # for token counting 7 | import sys 8 | import asyncio 9 | import re 10 | 11 | # Configuration 12 | USE_DOMAIN_RESTRICTIONS = False # Flag to enable or disable domain restriction logic 13 | ALLOW_ONLY = True # If True, only allowed domains are permitted. If False, only disallowed domains are blocked. 14 | 15 | ALLOWED_DOMAINS = [ 16 | '*.fi', # Allow all .fi domains 17 | 'google.com', # Allow google.com and all subdomains 18 | 'openai.com', # Allow openai.com and all subdomains 19 | ] 20 | 21 | DISALLOWED_DOMAINS = [ 22 | # Add specific domains or patterns you want to disallow, if any 23 | ] 24 | 25 | # check if the domain is allowed or not 26 | def is_domain_allowed(url): 27 | if not USE_DOMAIN_RESTRICTIONS: 28 | logging.warning("Domain restrictions are NOT in use. All domains are allowed.") 29 | return True # If restrictions are not used, allow all domains 30 | 31 | parsed_url = urllib.parse.urlparse(url) 32 | domain = parsed_url.netloc 33 | 34 | if ALLOW_ONLY: 35 | # In "allow only" mode, allow only domains in ALLOWED_DOMAINS 36 | for allowed in ALLOWED_DOMAINS: 37 | if re.fullmatch(allowed.replace('*', '.*'), domain): 38 | return True 39 | logging.warning(f"Domain not allowed: {domain}") 40 | return False 41 | else: 42 | # In "disallow only" mode, disallow only domains in DISALLOWED_DOMAINS 43 | for disallowed in DISALLOWED_DOMAINS: 44 | if re.fullmatch(disallowed.replace('*', '.*'), domain): 45 | logging.warning(f"Disallowed domain: {domain}") 46 | return False 47 | return True # Allow all other domains if not disallowed 48 | 49 | # get the website dump 50 | async def get_website_dump(url, max_tokens=10000): 51 | """ 52 | Fetches the content of a website using lynx --dump and returns it as a string. 53 | Ensures the content doesn't exceed the specified max token count. 54 | Cleans up unnecessary content and retains meaningful newlines. 55 | """ 56 | 57 | # Check if the domain is allowed 58 | if not is_domain_allowed(url): 59 | error_message = f"Error: Cannot browse the address, not allowed for URL: {url}" 60 | logging.error(error_message) 61 | return error_message 62 | 63 | try: 64 | # Execute the lynx command to fetch the website content 65 | result = subprocess.run(['lynx', '--dump', url], capture_output=True, text=True, timeout=15) 66 | 67 | # Check if the command was successful 68 | if result.returncode == 0: 69 | content = result.stdout 70 | 71 | # Filter out non-informative content using regex 72 | # content = re.sub(r'\[.*?\]|\(BUTTON\)|\s{2,}', ' ', content) # Remove links, buttons, and excessive spaces 73 | 74 | # Replace multiple spaces and tabs with a single space 75 | content = re.sub(r'\s+', ' ', content) 76 | 77 | # Keep meaningful newlines (keep single newlines, avoid empty lines) 78 | content = re.sub(r'\s*\n\s*', '\n', content) # Clean up newlines 79 | content = re.sub(r'\n{2,}', '\n', content) # Ensure no multiple consecutive newlines 80 | 81 | # Use the correct encoding for GPT-4o 82 | enc = tiktoken.encoding_for_model("gpt-4o") # Load the appropriate tokenizer for GPT-4o 83 | tokens = enc.encode(content) 84 | 85 | # Log the fetched content and token count 86 | logging.info(f"Upon user's request, fetched content from: {url}") 87 | logging.info(f"Token count: {len(tokens)}") 88 | 89 | # If the token count exceeds the max_tokens, truncate the content 90 | if len(tokens) > max_tokens: 91 | # Trim tokens to fit within the max_tokens 92 | tokens = tokens[:max_tokens] 93 | # Decode the trimmed tokens back to text 94 | content = enc.decode(tokens) 95 | logging.info(f"Content truncated to {max_tokens} tokens.") 96 | 97 | return content.strip() 98 | else: 99 | error_message = f"Error: Unable to fetch content from {url}. Return code: {result.returncode}" 100 | logging.error(error_message) 101 | return error_message 102 | 103 | except subprocess.TimeoutExpired: 104 | error_message = f"Error: Timed out while trying to fetch content from {url}." 105 | logging.error(error_message) 106 | return error_message 107 | 108 | except Exception as e: 109 | error_message = f"Error: An exception occurred while fetching content from {url}: {str(e)}" 110 | logging.error(error_message) 111 | return error_message 112 | 113 | # Tester to run the script directly 114 | if __name__ == "__main__": 115 | if len(sys.argv) != 2: 116 | print("Usage: python api_get_website_dump.py ") 117 | sys.exit(1) 118 | 119 | url = sys.argv[1] 120 | 121 | # Set up basic logging to console 122 | logging.basicConfig(level=logging.INFO) 123 | 124 | # Run the function and print the result 125 | result = asyncio.run(get_website_dump(url)) 126 | print(result) 127 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | # utils.py 2 | import os 3 | import re 4 | import shutil 5 | import sys 6 | import datetime 7 | from functools import partial 8 | import asyncio 9 | from concurrent.futures import ThreadPoolExecutor 10 | from pydub import AudioSegment 11 | import json 12 | import httpx 13 | import openai 14 | 15 | # Elasticsearch checks 16 | from config_paths import ( 17 | ELASTICSEARCH_ENABLED, ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, 18 | ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD 19 | ) 20 | 21 | # juhlapäivien käännösnimet 22 | holiday_replacements = { 23 | "New Year's Day": "uudenvuodenpäivä (New Year's Day)", 24 | "Epiphany": "loppiainen (Epiphany)", 25 | "Good Friday": "pitkäperjantai (Good Friday)", 26 | "Easter Sunday": "pääsiäispäivä (Easter Sunday)", 27 | "Easter Monday": "2. pääsiäispäivä (Easter Monday)", 28 | "May Day": "vappu (May Day)", 29 | "Ascension Day": "helatorstai (Ascension Day)", 30 | "Whit Sunday": "helluntaipäivä (Whit Sunday)", 31 | "Midsummer Eve": "juhannusaatto (Midsummer Eve)", 32 | "Midsummer Day": "juhannuspäivä (Midsummer Day)", 33 | "All Saints' Day": "pyhäinpäivä (All Saints' Day)", 34 | "Independence Day": "itsenäisyyspäivä (Independence Day)", 35 | "Christmas Eve": "jouluaatto (Christmas Eve)", 36 | "Christmas Day": "joulupäivä (Christmas Day)", 37 | "Second Day of Christmas": "Tapaninpäivä (Second Day of Christmas)", 38 | "New Year's Eve": "uudenvuodenaatto (New Year's Eve)", 39 | "May Day Eve": "vappuaatto (May Day Eve)" 40 | } 41 | 42 | # set `now` 43 | now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 44 | 45 | # print term width horizontal line 46 | def hz_line(character='-'): 47 | terminal_width = shutil.get_terminal_size().columns 48 | line = character * terminal_width 49 | print(line) 50 | sys.stdout.flush() # Flush the output to the terminal immediately 51 | 52 | # print the startup message 53 | def print_startup_message(version_number): 54 | now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 55 | hz_line() 56 | print(f"::: [{now}] Telegram bot (powered by ChatKeke) v.{version_number} starting up...", flush=True) 57 | # Print Elasticsearch status 58 | print(f"::: Elasticsearch enabled: {ELASTICSEARCH_ENABLED}", flush=True) 59 | hz_line() 60 | 61 | # remove html tags 62 | def remove_html_tags(text): 63 | """Remove html tags from a string""" 64 | clean = re.compile('<.*?>') 65 | return re.sub(clean, '', text) 66 | 67 | # escape markdown v2, v0.12 [currently not in use because this is a ... it's a thing] 68 | def escape_markdown_v2(text): 69 | 70 | # Escape MarkdownV2 special characters 71 | def escape_special_chars(m): 72 | char = m.group(0) 73 | # Escape all special characters with a backslash, except for asterisks and underscores 74 | if char in ('_', '*', '`'): 75 | # These are used for formatting and shouldn't be escaped. 76 | return char 77 | return '\\' + char 78 | 79 | # First, we'll handle the code blocks by temporarily removing them 80 | code_blocks = re.findall(r'```.*?```', text, re.DOTALL) 81 | code_placeholders = [f"CODEBLOCK{i}" for i in range(len(code_blocks))] 82 | for placeholder, block in zip(code_placeholders, code_blocks): 83 | text = text.replace(block, placeholder) 84 | 85 | # Now we escape the special characters outside of the code blocks 86 | text = re.sub(r'([[\]()~>#+\-=|{}.!])', escape_special_chars, text) 87 | 88 | # We convert **bold** and *italic* (or _italic_) syntax to Telegram's MarkdownV2 syntax 89 | # Bold: **text** to *text* 90 | text = re.sub(r'\*\*(.+?)\*\*', r'*\1*', text) 91 | # Italic: *text* or _text_ to _text_ (if not part of a code block) 92 | text = re.sub(r'\b_(.+?)_\b', r'_\1_', text) 93 | text = re.sub(r'\*(.+?)\*', r'_\1_', text) 94 | 95 | # Restore the code blocks 96 | for placeholder, block in zip(code_placeholders, code_blocks): 97 | text = text.replace(placeholder, block) 98 | 99 | return text 100 | 101 | # Calculate the total size of files in the specified directory. 102 | def get_directory_size(path: str) -> int: 103 | total_size = 0 104 | for dirpath, dirnames, filenames in os.walk(path): 105 | for f in filenames: 106 | fp = os.path.join(dirpath, f) 107 | total_size += os.path.getsize(fp) 108 | return total_size 109 | 110 | # Cleanup the oldest files in the specified directory when storage limit is exceeded. 111 | def cleanup_data_directory(path: str, max_storage_mb: int): 112 | files = [os.path.join(path, f) for f in os.listdir(path)] 113 | files.sort(key=lambda x: os.path.getmtime(x)) 114 | 115 | while get_directory_size(path) >= max_storage_mb * 1024 * 1024 and files: 116 | os.remove(files.pop(0)) # Remove the oldest file 117 | 118 | # examine an audio file's length (for WhisperAPI transcriptions) 119 | # ~ 120 | # This function doesn't inherently need to be async, as pydub's processing is synchronous. 121 | # However, if you're performing asynchronous file I/O or need to integrate with other async code, it can be async. 122 | # when in async mode 123 | executor = ThreadPoolExecutor(10) # Adjust the number of workers based on your needs 124 | # the function 125 | async def get_voice_message_duration(voice_file_path): 126 | loop = asyncio.get_running_loop() 127 | audio = await loop.run_in_executor(executor, AudioSegment.from_file, voice_file_path) 128 | duration_seconds = len(audio) / 1000 129 | duration_minutes = duration_seconds / 60 130 | return duration_minutes 131 | -------------------------------------------------------------------------------- /src/reminder_poller.py: -------------------------------------------------------------------------------- 1 | # src/reminder_poller.py 2 | 3 | import asyncio 4 | import logging 5 | import configparser 6 | from datetime import datetime, timezone # Import timezone 7 | 8 | # --- Corrected Imports --- 9 | from config_paths import CONFIG_PATH, REMINDERS_DB_PATH 10 | import db_utils 11 | from telegram.ext import Application 12 | from telegram.error import Forbidden, BadRequest 13 | from telegram.constants import ParseMode 14 | 15 | # load and use logger 16 | logger = logging.getLogger(__name__) 17 | logger.setLevel(logging.INFO) 18 | 19 | # Load configuration 20 | config = configparser.ConfigParser() 21 | config.read(CONFIG_PATH) 22 | 23 | # Read configuration safely 24 | try: 25 | POLLING_INTERVAL = config.getint('Reminders', 'PollingIntervalSeconds', fallback=60) # Default to 60s 26 | REMINDERS_ENABLED = config.getboolean('Reminders', 'EnableReminders', fallback=False) 27 | except configparser.NoSectionError: 28 | logger.warning("[Reminders] section missing in config.ini, using defaults (Polling=60s, Enabled=False)") 29 | POLLING_INTERVAL = 60 30 | REMINDERS_ENABLED = False 31 | except ValueError: 32 | logger.error("Invalid non-integer value for PollingIntervalSeconds in config.ini. Using default 60s.") 33 | POLLING_INTERVAL = 60 34 | REMINDERS_ENABLED = config.getboolean('Reminders', 'EnableReminders', fallback=False) # Still try to read enable flag 35 | 36 | # split to fit to telegram's msg length 37 | MAX_TG_MSG_LENGTH = 4096 38 | 39 | def split_long_message(message, max_length=MAX_TG_MSG_LENGTH): 40 | """ 41 | Splits a message into multiple parts, each up to max_length characters, 42 | and returns a list of parts. 43 | """ 44 | parts = [] 45 | start_index = 0 46 | while start_index < len(message): 47 | # Slice out a chunk of up to 'max_length' characters 48 | part = message[start_index:start_index + max_length] 49 | parts.append(part) 50 | start_index += max_length 51 | return parts 52 | 53 | # --- Corrected Function Signature --- 54 | async def reminder_poller(application: Application): 55 | """Periodically checks for due reminders and sends notifications.""" 56 | 57 | # Check if the feature is enabled right at the start 58 | if not REMINDERS_ENABLED: 59 | logger.info("Reminder Poller exiting: Feature disabled in config.ini.") 60 | return # Stop the poller task if disabled 61 | 62 | # Check if the database was initialized successfully 63 | if not db_utils.DB_INITIALIZED_SUCCESSFULLY: 64 | logger.error("Reminder Poller exiting: DB was not initialized successfully.") 65 | return 66 | 67 | logger.info(f"Reminder poller started. Checking every {POLLING_INTERVAL} seconds.") 68 | 69 | while True: 70 | try: 71 | # --- Get Current Time --- 72 | now_utc_str = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') 73 | 74 | # --- Fetch due reminders using the correct DB path and time --- 75 | due_reminders = db_utils.get_due_reminders(REMINDERS_DB_PATH, now_utc_str) 76 | 77 | if due_reminders: 78 | logger.info(f"Found {len(due_reminders)} due reminders.") 79 | for r in due_reminders: 80 | reminder_id = r['reminder_id'] 81 | user_id = r['user_id'] 82 | chat_id = r['chat_id'] 83 | raw_text = r['reminder_text'] 84 | 85 | # The text you'd like to send (with an optional emoji, etc.) 86 | msg = f"🔔 {raw_text}" 87 | 88 | # 1) Split into multiple parts if over 4k 89 | msg_parts = split_long_message(msg) 90 | 91 | try: 92 | # 2) Send each part in a separate message 93 | for part in msg_parts: 94 | await application.bot.send_message( 95 | chat_id=chat_id, 96 | text=part, 97 | parse_mode=ParseMode.HTML 98 | ) 99 | 100 | # 3) Mark the reminder as sent 101 | db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'sent') 102 | logger.info(f"Sent reminder {reminder_id} to chat {chat_id} for user {user_id}.") 103 | 104 | # --- Specific Error Handling --- 105 | except Forbidden: 106 | logger.warning(f"Failed sending reminder {reminder_id} to chat {chat_id}. Bot forbidden (blocked?).") 107 | db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'failed_forbidden') 108 | except BadRequest as e: 109 | logger.error(f"Failed sending reminder {reminder_id} to chat {chat_id}. Bad request (chat not found?): {e}") 110 | db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'failed_bad_request') 111 | except Exception as e: 112 | logger.error(f"Unexpected error sending reminder {reminder_id} to chat {chat_id}: {e}") 113 | # Decide: update status to 'failed_unknown' or leave 'pending' to retry? 114 | # Let's mark as failed for now to avoid potential spamming if the error persists. 115 | db_utils.update_reminder_status(REMINDERS_DB_PATH, reminder_id, 'failed_unknown') 116 | else: 117 | logger.debug("No reminders due.") 118 | 119 | except Exception as e: 120 | logger.error(f"Error in reminder polling loop: {e}") 121 | # Avoid crashing the poller, wait before next cycle 122 | await asyncio.sleep(POLLING_INTERVAL) # Still wait even if there was an error fetching 123 | 124 | # Wait for the next polling interval 125 | await asyncio.sleep(POLLING_INTERVAL) -------------------------------------------------------------------------------- /src/api_get_openrouteservice.py: -------------------------------------------------------------------------------- 1 | # api_get_openrouteservice.py 2 | 3 | import os 4 | import httpx 5 | import logging 6 | import json 7 | import openai 8 | 9 | # Function to retrieve the OpenRouteService API key 10 | def get_openrouteservice_api_key(): 11 | api_key = os.getenv('OPENROUTESERVICE_API_KEY') 12 | if not api_key: 13 | logging.error("OpenRouteService API key not set.") 14 | return None 15 | return api_key 16 | 17 | # Async function to get geographic coordinates from an address 18 | async def geocode_address(address, api_key): 19 | base_url = 'https://api.openrouteservice.org/geocode/search' 20 | params = { 21 | 'api_key': api_key, 22 | 'text': address 23 | } 24 | async with httpx.AsyncClient() as client: 25 | response = await client.get(base_url, params=params) 26 | if response.status_code == 200: 27 | data = response.json() 28 | # Assumes the first feature is the most relevant match 29 | geometry = data['features'][0]['geometry'] 30 | return geometry['coordinates'] 31 | else: 32 | logging.error(f"Geocoding error: {response.text}") 33 | return None 34 | 35 | # async function to get directions 36 | async def get_route(start_coords, end_coords, profile="driving-car", format="json"): 37 | api_key = get_openrouteservice_api_key() 38 | if not api_key: 39 | return "OpenRouteService API key not set." 40 | 41 | base_url = f'https://api.openrouteservice.org/v2/directions/{profile}/{format}' 42 | headers = { 43 | 'Authorization': api_key, 44 | 'Content-Type': 'application/json', 45 | } 46 | body = { 47 | 'coordinates': [start_coords, end_coords], # Correct format for coordinates 48 | } 49 | 50 | async with httpx.AsyncClient() as client: 51 | response = await client.post(base_url, headers=headers, json=body) 52 | 53 | if response.status_code == 200: 54 | route_data = response.json() 55 | logging.info(f"API Response: {response.json()}") 56 | directions = format_route(route_data) 57 | return directions 58 | else: 59 | error_message = f"Failed to get directions. API error cause: {response.text}" 60 | logging.error(error_message) 61 | return error_message 62 | 63 | # Function to format the routing data into a user-friendly message 64 | def format_route(data): 65 | # Assuming 'routes' is the correct key and contains the expected data 66 | if 'routes' in data and len(data['routes']) > 0: 67 | # Assuming the first route and its first segment are what we're interested in 68 | steps = data['routes'][0]['segments'][0]['steps'] 69 | instructions = [step['instruction'] for step in steps] 70 | return ' '.join(instructions) 71 | else: 72 | logging.error("Missing 'routes', 'segments', or 'steps' in API response.") 73 | return "Error: API response is missing required information." 74 | 75 | # Function that wraps the geocoding of two addresses and getting the route between them 76 | async def get_directions_from_addresses(start_address, end_address, profile="driving-car"): 77 | api_key = get_openrouteservice_api_key() 78 | if not api_key: 79 | return "OpenRouteService API key not set." 80 | 81 | start_coords = await geocode_address(start_address, api_key) 82 | end_coords = await geocode_address(end_address, api_key) 83 | 84 | if start_coords and end_coords: 85 | return await get_route(start_coords, end_coords, profile) 86 | else: 87 | return "Could not geocode one or both of the addresses. Please ask the user to clarify." 88 | 89 | # Format the directions information and translate it if necessary. 90 | async def format_and_translate_directions(bot, user_request, directions_info): 91 | # System message to instruct the model 92 | format_translate_system_message = { 93 | "role": "system", 94 | "content": "Format the incoming data into a human readable format. Translate if needed (depending on user's language) and format the data into a digestible Telegram message with emoji symbols and HTML parse mode tags. Use i.e. Directions etc. Respond in user's original language!" 95 | } 96 | 97 | # Prepare chat history with the user's request, system message, and directions info 98 | chat_history = [ 99 | {"role": "user", "content": user_request}, 100 | format_translate_system_message, 101 | {"role": "assistant", "content": directions_info} 102 | ] 103 | 104 | # Prepare the payload for the OpenAI API 105 | payload = { 106 | "model": bot.model, 107 | "messages": chat_history, 108 | "temperature": 0.5 109 | } 110 | 111 | headers = { 112 | "Content-Type": "application/json", 113 | "Authorization": f"Bearer {openai.api_key}" 114 | } 115 | 116 | # Make the API request 117 | async with httpx.AsyncClient() as client: 118 | response = await client.post("https://api.openai.com/v1/chat/completions", 119 | data=json.dumps(payload), 120 | headers=headers, 121 | timeout=bot.timeout) 122 | response_json = response.json() 123 | 124 | # Extract the formatted and potentially translated response 125 | if response.status_code == 200 and 'choices' in response_json: 126 | translated_reply = response_json['choices'][0]['message']['content'].strip() 127 | bot_token_count = bot.count_tokens(translated_reply) # Count the tokens in the translated reply 128 | bot.total_token_usage += bot_token_count # Add to the total token usage 129 | bot.write_total_token_usage(bot.total_token_usage) # Update the total token usage file 130 | logging.info(f"Sent this directions report to user: {translated_reply}") 131 | return translated_reply 132 | else: 133 | logging.error("Error in formatting and translating directions data.") 134 | return directions_info # Return the original directions info in case of error 135 | -------------------------------------------------------------------------------- /src/config_paths.py: -------------------------------------------------------------------------------- 1 | # config_paths.py 2 | 3 | import os 4 | from pathlib import Path 5 | import configparser 6 | import logging 7 | 8 | # Initialize the logger for this module 9 | logger = logging.getLogger('TelegramBotLogger') # Ensure that 'TelegramBotLogger' is initialized in main.py 10 | 11 | # Define the base directory (the parent of the 'src' directory) 12 | BASE_DIR = Path(__file__).resolve().parents[1] 13 | 14 | # Path to the configuration file 15 | CONFIG_PATH = BASE_DIR / 'config' / 'config.ini' 16 | 17 | # Initialize the ConfigParser 18 | config = configparser.ConfigParser() 19 | 20 | # Initialize variables with default values 21 | logs_directory = 'logs' 22 | LOG_FILE_PATH = BASE_DIR / logs_directory / 'bot.log' 23 | CHAT_LOG_FILE_PATH = BASE_DIR / logs_directory / 'chat.log' 24 | TOKEN_USAGE_FILE_PATH = BASE_DIR / logs_directory / 'token_usage.json' 25 | CHAT_LOG_MAX_SIZE = 10 * 1024 * 1024 # 10 MB 26 | ELASTICSEARCH_ENABLED = False 27 | ELASTICSEARCH_HOST = 'localhost' 28 | ELASTICSEARCH_PORT = 9200 29 | ELASTICSEARCH_USERNAME = '' 30 | ELASTICSEARCH_PASSWORD = '' 31 | 32 | # Default NWS settings 33 | NWS_USER_AGENT = 'ChatKekeWeather/1.0 (flyingfathead@protonmail.com)' 34 | NWS_RETRIES = 0 35 | NWS_RETRY_DELAY = 2 36 | 37 | # read the reminders db 38 | data_directory_name = 'data' # Default name for data directory 39 | REMINDERS_DB_FILENAME = 'reminders.db' # Default name for the reminders DB file 40 | 41 | # Attempt to read the configuration file 42 | if CONFIG_PATH.exists(): 43 | try: 44 | config.read(CONFIG_PATH) 45 | logger.info(f"Configuration file found and loaded from {CONFIG_PATH}.") 46 | 47 | # Read logs directory 48 | logs_directory = config['DEFAULT'].get('LogsDirectory', 'logs') 49 | 50 | # Define the logs directory path 51 | LOGS_DIR = BASE_DIR / logs_directory 52 | 53 | # Ensure the logs directory exists 54 | LOGS_DIR.mkdir(parents=True, exist_ok=True) 55 | 56 | # Read data directory name from config 57 | data_directory_name = config['DEFAULT'].get('DataDirectory', 'data') 58 | 59 | # Update log file paths 60 | LOG_FILE_PATH = LOGS_DIR / config['DEFAULT'].get('LogFile', 'bot.log') 61 | CHAT_LOG_FILE_PATH = LOGS_DIR / config['DEFAULT'].get('ChatLogFile', 'chat.log') 62 | TOKEN_USAGE_FILE_PATH = LOGS_DIR / 'token_usage.json' 63 | 64 | # Read ChatLogMaxSizeMB and convert to bytes 65 | ChatLogMaxSizeMB = config['DEFAULT'].getint('ChatLogMaxSizeMB', fallback=10) 66 | CHAT_LOG_MAX_SIZE = ChatLogMaxSizeMB * 1024 * 1024 67 | 68 | # Read Elasticsearch configurations 69 | if 'Elasticsearch' in config: 70 | ELASTICSEARCH_ENABLED = config['Elasticsearch'].getboolean('ElasticsearchEnabled', fallback=False) 71 | ELASTICSEARCH_HOST = config['Elasticsearch'].get('Host', fallback='localhost') 72 | ELASTICSEARCH_PORT = config['Elasticsearch'].getint('Port', fallback=9200) 73 | ELASTICSEARCH_SCHEME = config.get('Elasticsearch', 'ELASTICSEARCH_SCHEME', fallback='http') 74 | ELASTICSEARCH_USERNAME = config['Elasticsearch'].get('Username', fallback='') 75 | ELASTICSEARCH_PASSWORD = config['Elasticsearch'].get('Password', fallback='') 76 | logger.info(f"Elasticsearch Enabled: {ELASTICSEARCH_ENABLED}") 77 | else: 78 | # Elasticsearch section missing 79 | ELASTICSEARCH_ENABLED = False 80 | ELASTICSEARCH_HOST = 'localhost' 81 | ELASTICSEARCH_PORT = 9200 82 | ELASTICSEARCH_SCHEME = 'http' 83 | ELASTICSEARCH_USERNAME = '' 84 | ELASTICSEARCH_PASSWORD = '' 85 | logger.warning("Elasticsearch section missing in config.ini. Using default Elasticsearch settings.") 86 | 87 | # NWS Configuration 88 | if 'NWS' in config: 89 | NWS_USER_AGENT = config['NWS'].get('NWSUserAgent', fallback='ChatKekeWeather/1.0 (flyingfathead@protonmail.com)') 90 | NWS_RETRIES = config['NWS'].getint('NWSRetries', fallback=0) 91 | NWS_RETRY_DELAY = config['NWS'].getint('NWSRetryDelay', fallback=2) 92 | FETCH_NWS_FORECAST = config['NWS'].getboolean('FetchNWSForecast', fallback=True) 93 | FETCH_NWS_ALERTS = config['NWS'].getboolean('FetchNWSAlerts', fallback=True) 94 | NWS_ONLY_ELIGIBLE_COUNTRIES = config['NWS'].getboolean('NwsOnlyEligibleCountries', fallback=True) 95 | NWS_ELIGIBLE_COUNTRIES = config['NWS'].get('NwsEligibleCountries', fallback='US, PR, GU, AS, VI, MP').split(', ') 96 | logger.info(f"NWS Config: User-Agent={NWS_USER_AGENT}, Retries={NWS_RETRIES}, Retry Delay={NWS_RETRY_DELAY}, Fetch Forecast={FETCH_NWS_FORECAST}, Fetch Alerts={FETCH_NWS_ALERTS}") 97 | else: 98 | logger.warning("NWS section not found in config.ini. Using default NWS settings.") 99 | 100 | except Exception as e: 101 | # Handle exceptions during config parsing 102 | logger.error(f"Error reading configuration file: {e}") 103 | else: 104 | # config.ini not found 105 | logger.warning(f"Configuration file NOT found at {CONFIG_PATH}. Using default settings. This is NOT a good idea!") 106 | # Ensure the logs directory exists 107 | LOGS_DIR = BASE_DIR / logs_directory 108 | LOGS_DIR.mkdir(parents=True, exist_ok=True) 109 | # Define log file paths 110 | LOG_FILE_PATH = LOGS_DIR / 'bot.log' 111 | CHAT_LOG_FILE_PATH = LOGS_DIR / 'chat.log' 112 | TOKEN_USAGE_FILE_PATH = LOGS_DIR / 'token_usage.json' 113 | # CHAT_LOG_MAX_SIZE already set to 10 MB 114 | # Elasticsearch settings already set to defaults 115 | 116 | # Define the Data Directory path 117 | DATA_DIR = BASE_DIR / data_directory_name 118 | # Ensure the data directory exists 119 | try: 120 | DATA_DIR.mkdir(parents=True, exist_ok=True) 121 | except OSError as e: 122 | logger.error(f"Could not create data directory {DATA_DIR}: {e}") 123 | 124 | # Path for the reminders database 125 | REMINDERS_DB_PATH = DATA_DIR / REMINDERS_DB_FILENAME 126 | logger.info(f"Reminders database path set to: {REMINDERS_DB_PATH}") 127 | 128 | # Define paths for token files 129 | TOKEN_FILE_PATH = BASE_DIR / 'config' / 'bot_token.txt' 130 | API_TOKEN_PATH = BASE_DIR / 'config' / 'api_token.txt' 131 | -------------------------------------------------------------------------------- /src/api_get_stock_prices.py: -------------------------------------------------------------------------------- 1 | # api_get_stock_prices_alphavantage.py 2 | # 3 | # Stock price API fetching via Alpha Vantage 4 | # (You need to register at https://www.alphavantage.co for your own API key) 5 | # 6 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 7 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 8 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 | 10 | import httpx 11 | import os 12 | import logging 13 | import sys 14 | import asyncio 15 | from datetime import datetime 16 | 17 | # Configure logging 18 | # logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') 19 | 20 | # Utility function to get API key 21 | def get_api_key(): 22 | api_key = os.getenv('ALPHA_VANTAGE_API_KEY') 23 | if not api_key: 24 | logging.error("Alpha Vantage API key not set. You need to set the 'ALPHA_VANTAGE_API_KEY' environment variable to use Alpha Vantage API functionalities!") 25 | return None 26 | return api_key 27 | 28 | # Search for stock symbol 29 | async def search_stock_symbol(keyword): 30 | api_key = get_api_key() 31 | if not api_key: 32 | return "Alpha Vantage API key not set." 33 | 34 | logging.info(f"Searching stock symbol for keyword: {keyword}") 35 | 36 | base_url = 'https://www.alphavantage.co/query' 37 | params = { 38 | 'function': 'SYMBOL_SEARCH', 39 | 'keywords': keyword, 40 | 'apikey': api_key 41 | } 42 | 43 | async with httpx.AsyncClient() as client: 44 | response = await client.get(base_url, params=params) 45 | logging.info(f"Symbol search response status: {response.status_code}") 46 | 47 | if response.status_code == 200: 48 | data = response.json() 49 | logging.debug(f"Symbol search response data: {data}") 50 | if 'Information' in data and 'rate limit' in data['Information'].lower(): 51 | return "API rate limit exceeded. Please try again later or upgrade to a premium plan." 52 | 53 | best_match = data.get('bestMatches', []) 54 | if best_match: 55 | # Prioritize correct symbol 56 | for match in best_match: 57 | if match['1. symbol'].upper() == keyword.upper(): 58 | logging.debug(f"Exact match found: {match}") 59 | return match 60 | logging.debug(f"Best match found: {best_match[0]}") 61 | return best_match[0] # Return the first match if no exact match found 62 | else: 63 | logging.info("No matches found.") 64 | return "No matches found." 65 | else: 66 | logging.error(f"Failed to search for symbol: {response.text}") 67 | return "Failed to search for symbol. Please try again later." 68 | 69 | # Get stock price data with fallback to search 70 | async def get_stock_price(symbol, original_symbol=None): 71 | if original_symbol is None: 72 | original_symbol = symbol 73 | 74 | if symbol != original_symbol and symbol == original_symbol: 75 | logging.error(f"Symbol search loop detected for {symbol}. Terminating.") 76 | return "Symbol search loop detected. Please check the stock symbol and try again." 77 | 78 | api_key = get_api_key() 79 | if not api_key: 80 | return "Alpha Vantage API key not set." 81 | 82 | logging.info(f"Fetching stock data for symbol: {symbol}") 83 | 84 | base_url = 'https://www.alphavantage.co/query' 85 | params = { 86 | 'function': 'TIME_SERIES_INTRADAY', 87 | 'symbol': symbol, 88 | 'interval': '1min', 89 | 'apikey': api_key 90 | } 91 | 92 | async with httpx.AsyncClient() as client: 93 | response = await client.get(base_url, params=params) 94 | logging.info(f"Stock data response status: {response.status_code}") 95 | logging.debug(f"Stock data response content: {response.text}") 96 | 97 | if response.status_code == 200: 98 | data = response.json() 99 | logging.debug(f"Stock data response JSON: {data}") 100 | if 'Information' in data and 'rate limit' in data['Information'].lower(): 101 | return "API rate limit exceeded. Please try again later or upgrade to a premium plan." 102 | try: 103 | last_refreshed = data['Meta Data']['3. Last Refreshed'] 104 | last_price = data['Time Series (1min)'][last_refreshed]['1. open'] 105 | now_utc = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') 106 | return f"{symbol} last price: {last_price} USD. Data fetched at: {now_utc} UTC via Alpha Vantage." 107 | except KeyError: 108 | logging.info(f"Symbol {symbol} not found, attempting to search for symbol.") 109 | # Fallback to search if direct symbol fetch fails 110 | symbol_info = await search_stock_symbol(symbol) 111 | logging.debug(f"Search result for symbol {symbol}: {symbol_info}") 112 | if isinstance(symbol_info, dict) and '1. symbol' in symbol_info: 113 | new_symbol = symbol_info['1. symbol'] 114 | logging.info(f"Found new symbol: {new_symbol}") 115 | if new_symbol == symbol: 116 | logging.error(f"Symbol search loop detected for {symbol}. Terminating.") 117 | return "Symbol search loop detected. Please check the stock symbol and try again." 118 | return f"Symbol {symbol} wasn't found, searched instead. " + await get_stock_price(new_symbol, original_symbol) 119 | else: 120 | logging.error(f"Search for symbol failed or returned no matches: {symbol_info}") 121 | return "Error fetching data. Please check the stock symbol and try again." 122 | else: 123 | logging.error(f"Failed to fetch stock data: {response.text}") 124 | return "Failed to fetch stock data. Tell the user to please try again later or try with Perplexity API as a fallback." 125 | 126 | # Main function to handle command-line arguments 127 | async def main(): 128 | if len(sys.argv) < 2: 129 | print("Usage: python api_get_stock_prices.py ") 130 | return 131 | 132 | query = sys.argv[1] 133 | stock_data = await get_stock_price(query) 134 | print(stock_data) 135 | 136 | if __name__ == "__main__": 137 | asyncio.run(main()) 138 | -------------------------------------------------------------------------------- /src/api_get_stock_prices_alphavantage.py: -------------------------------------------------------------------------------- 1 | # api_get_stock_prices_alphavantage.py 2 | # 3 | # Stock price API fetching via Alpha Vantage 4 | # (You need to register at https://www.alphavantage.co for your own API key) 5 | # 6 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 7 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 8 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 | 10 | import httpx 11 | import os 12 | import logging 13 | import sys 14 | import asyncio 15 | from datetime import datetime 16 | 17 | # Configure logging 18 | # logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') 19 | 20 | # Utility function to get API key 21 | def get_api_key(): 22 | api_key = os.getenv('ALPHA_VANTAGE_API_KEY') 23 | if not api_key: 24 | logging.error("Alpha Vantage API key not set. You need to set the 'ALPHA_VANTAGE_API_KEY' environment variable to use Alpha Vantage API functionalities!") 25 | return None 26 | return api_key 27 | 28 | # Search for stock symbol 29 | async def search_stock_symbol(keyword): 30 | api_key = get_api_key() 31 | if not api_key: 32 | return "Alpha Vantage API key not set." 33 | 34 | logging.info(f"Searching stock symbol for keyword: {keyword}") 35 | 36 | base_url = 'https://www.alphavantage.co/query' 37 | params = { 38 | 'function': 'SYMBOL_SEARCH', 39 | 'keywords': keyword, 40 | 'apikey': api_key 41 | } 42 | 43 | async with httpx.AsyncClient() as client: 44 | response = await client.get(base_url, params=params) 45 | logging.info(f"Symbol search response status: {response.status_code}") 46 | 47 | if response.status_code == 200: 48 | data = response.json() 49 | logging.debug(f"Symbol search response data: {data}") 50 | if 'Information' in data and 'rate limit' in data['Information'].lower(): 51 | return "API rate limit exceeded. Please try again later or upgrade to a premium plan." 52 | 53 | best_match = data.get('bestMatches', []) 54 | if best_match: 55 | # Prioritize correct symbol 56 | for match in best_match: 57 | if match['1. symbol'].upper() == keyword.upper(): 58 | logging.debug(f"Exact match found: {match}") 59 | return match 60 | logging.debug(f"Best match found: {best_match[0]}") 61 | return best_match[0] # Return the first match if no exact match found 62 | else: 63 | logging.info("No matches found.") 64 | return "No matches found." 65 | else: 66 | logging.error(f"Failed to search for symbol: {response.text}") 67 | return "Failed to search for symbol. Please try again later." 68 | 69 | # Get stock price data with fallback to search 70 | async def get_stock_price(symbol, original_symbol=None): 71 | if original_symbol is None: 72 | original_symbol = symbol 73 | 74 | if symbol != original_symbol and symbol == original_symbol: 75 | logging.error(f"Symbol search loop detected for {symbol}. Terminating.") 76 | return "Symbol search loop detected. Please check the stock symbol and try again." 77 | 78 | api_key = get_api_key() 79 | if not api_key: 80 | return "Alpha Vantage API key not set." 81 | 82 | logging.info(f"Fetching stock data for symbol: {symbol}") 83 | 84 | base_url = 'https://www.alphavantage.co/query' 85 | params = { 86 | 'function': 'TIME_SERIES_INTRADAY', 87 | 'symbol': symbol, 88 | 'interval': '1min', 89 | 'apikey': api_key 90 | } 91 | 92 | async with httpx.AsyncClient() as client: 93 | response = await client.get(base_url, params=params) 94 | logging.info(f"Stock data response status: {response.status_code}") 95 | logging.debug(f"Stock data response content: {response.text}") 96 | 97 | if response.status_code == 200: 98 | data = response.json() 99 | logging.debug(f"Stock data response JSON: {data}") 100 | if 'Information' in data and 'rate limit' in data['Information'].lower(): 101 | return "API rate limit exceeded. Please try again later or upgrade to a premium plan." 102 | try: 103 | last_refreshed = data['Meta Data']['3. Last Refreshed'] 104 | last_price = data['Time Series (1min)'][last_refreshed]['1. open'] 105 | now_utc = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') 106 | return f"{symbol} last price: {last_price} USD. Data fetched at: {now_utc} UTC via Alpha Vantage." 107 | except KeyError: 108 | logging.info(f"Symbol {symbol} not found, attempting to search for symbol.") 109 | # Fallback to search if direct symbol fetch fails 110 | symbol_info = await search_stock_symbol(symbol) 111 | logging.debug(f"Search result for symbol {symbol}: {symbol_info}") 112 | if isinstance(symbol_info, dict) and '1. symbol' in symbol_info: 113 | new_symbol = symbol_info['1. symbol'] 114 | logging.info(f"Found new symbol: {new_symbol}") 115 | if new_symbol == symbol: 116 | logging.error(f"Symbol search loop detected for {symbol}. Terminating.") 117 | return "Symbol search loop detected. Please check the stock symbol and try again." 118 | return f"Symbol {symbol} wasn't found, searched instead. " + await get_stock_price(new_symbol, original_symbol) 119 | else: 120 | logging.error(f"Search for symbol failed or returned no matches: {symbol_info}") 121 | return "Error fetching data. Please check the stock symbol and try again." 122 | else: 123 | logging.error(f"Failed to fetch stock data: {response.text}") 124 | return "Failed to fetch stock data. Tell the user to please try again later or try with Perplexity API as a fallback." 125 | 126 | # Main function to handle command-line arguments 127 | async def main(): 128 | if len(sys.argv) < 2: 129 | print("Usage: python api_get_stock_prices.py ") 130 | return 131 | 132 | query = sys.argv[1] 133 | stock_data = await get_stock_price(query) 134 | print(stock_data) 135 | 136 | if __name__ == "__main__": 137 | asyncio.run(main()) 138 | -------------------------------------------------------------------------------- /src/url_handler.py: -------------------------------------------------------------------------------- 1 | # url_handler.py 2 | # v0.60.1 3 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 5 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | import time 8 | import logging 9 | import re 10 | import asyncio 11 | import json 12 | 13 | # Toggle this to use the full description or a snippet. 14 | USE_SNIPPET_FOR_DESCRIPTION = False 15 | 16 | # If we're using a snippet of the description, maximum number of lines to include 17 | DESCRIPTION_MAX_LINES = 30 18 | 19 | # Configure logging 20 | logger = logging.getLogger(__name__) 21 | # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 22 | 23 | # Helper function to format duration from seconds to H:M:S 24 | def format_duration(duration): 25 | if not duration: 26 | return 'No duration available' 27 | hours, remainder = divmod(duration, 3600) 28 | minutes, seconds = divmod(remainder, 60) 29 | if hours: 30 | return f"{hours}h {minutes}m {seconds}s" 31 | else: 32 | return f"{minutes}m {seconds}s" 33 | 34 | # i.e. for youtube videos 35 | async def fetch_youtube_details(url, max_retries=3, base_delay=5): 36 | command = ["yt-dlp", "--user-agent", 37 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", 38 | "--dump-json", url] 39 | 40 | for attempt in range(max_retries): 41 | process = await asyncio.create_subprocess_exec( 42 | *command, 43 | stdout=asyncio.subprocess.PIPE, 44 | stderr=asyncio.subprocess.PIPE, 45 | ) 46 | 47 | stdout, stderr = await process.communicate() 48 | 49 | if stderr and process.returncode != 0: 50 | logger.warning(f"Attempt {attempt + 1} failed: {stderr.decode()}") 51 | if attempt < max_retries - 1: 52 | wait_time = base_delay * (2 ** attempt) # Exponential backoff 53 | logger.info(f"Retrying after {wait_time} seconds...") 54 | await asyncio.sleep(wait_time) 55 | else: 56 | logger.error("All retry attempts failed.") 57 | else: 58 | try: 59 | video_details = json.loads(stdout.decode()) 60 | duration_formatted = format_duration(video_details.get('duration')) 61 | 62 | if USE_SNIPPET_FOR_DESCRIPTION: 63 | # Get the snippet if the flag is set to True. 64 | description_text = get_description_snippet(video_details.get('description', 'No description available')) 65 | else: 66 | # Use the full description if the flag is set to False. 67 | description_text = video_details.get('description', 'No description available') 68 | 69 | filtered_details = { 70 | 'title': video_details.get('title', 'No title available'), 71 | # 'duration': video_details.get('duration', 'No duration available'), 72 | 'duration': duration_formatted, 73 | 'channel': video_details.get('uploader', 'No channel information available'), 74 | 'upload_date': video_details.get('upload_date', 'No upload date available'), 75 | 'views': video_details.get('view_count', 'No views available'), 76 | 'likes': video_details.get('like_count', 'No likes available'), 77 | 'average_rating': video_details.get('average_rating', 'No rating available'), 78 | 'comment_count': video_details.get('comment_count', 'No comment count available'), 79 | 'channel_id': video_details.get('channel_id', 'No channel ID available'), 80 | 'video_id': video_details.get('id', 'No video ID available'), 81 | 'tags': video_details.get('tags', ['No tags available']), 82 | 'description': description_text, 83 | } 84 | 85 | logger.info(f"Fetched YouTube details successfully for URL: {url}") 86 | return filtered_details 87 | except json.JSONDecodeError as e: 88 | logger.error(f"Error decoding JSON from yt-dlp output: {e}") 89 | return None 90 | return None 91 | 92 | # Helper function to get up to n lines from the description 93 | def get_description_snippet(description, max_lines=DESCRIPTION_MAX_LINES): 94 | lines = description.split('\n') 95 | snippet = '\n'.join(lines[:max_lines]) 96 | return snippet 97 | 98 | # Regular expression for extracting the YouTube video ID 99 | YOUTUBE_REGEX = ( 100 | r'(https?://)?(www\.)?' 101 | '(youtube|youtu|youtube-nocookie)\.(com|be)/' 102 | '(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})') 103 | 104 | def extract_youtube_video_id(url): 105 | match = re.match(YOUTUBE_REGEX, url) 106 | if not match: 107 | raise ValueError("Invalid YouTube URL") 108 | return match.group(6) 109 | 110 | # for parsing types of urls 111 | async def process_url_message(message_text): 112 | urls = re.findall(r'(https?://\S+)', message_text) 113 | context_messages = [] 114 | 115 | for url in urls: 116 | if not re.match(YOUTUBE_REGEX, url): 117 | logger.info(f"Skipping non-YouTube URL: {url}") 118 | continue 119 | 120 | try: 121 | # At this point, we're sure it's a YouTube URL, so we process it. 122 | video_id = extract_youtube_video_id(url) 123 | youtube_url = f"https://www.youtube.com/watch?v={video_id}" 124 | logger.info(f"Processing YouTube URL: {youtube_url}") 125 | details = await fetch_youtube_details(youtube_url) 126 | if details: 127 | description_snippet = get_description_snippet(details['description'], DESCRIPTION_MAX_LINES) 128 | context_message = ( 129 | f"[INFO] Details for the URL: {youtube_url}\n" 130 | f"Title: {details['title']}\n" 131 | f"Duration: {details['duration']}\n" 132 | f"Channel: {details['channel']}\n" 133 | f"Upload date: {details['upload_date']}\n" 134 | f"Views: {details['views']}\n" 135 | f"Likes: {details['likes']}\n" 136 | f"Rating: {details['average_rating']}\n" 137 | f"Comments: {details['comment_count']}\n" 138 | f"Tags: {', '.join(details['tags'])}\n" 139 | f"Description: {description_snippet}\n" 140 | # f"[ If user didn't request anything special about the URL, PASS THEM I.E. THE ABOVEMENTIONED INFORMATION. ]\n" 141 | ) 142 | context_messages.append(context_message) 143 | logger.info(f"Added context message: {context_message}") 144 | else: 145 | logger.warning(f"No details fetched for YouTube URL: {youtube_url}") 146 | except ValueError as e: 147 | logger.error(f"Invalid YouTube URL encountered: {url} - {str(e)}") 148 | except Exception as e: 149 | logger.error(f"Failed to process YouTube URL {youtube_url}: {str(e)}") 150 | 151 | return context_messages 152 | -------------------------------------------------------------------------------- /src/voice_message_handler.py: -------------------------------------------------------------------------------- 1 | # voice_message_handler.py 2 | # ~~~~~~~~~~~~~~~~~~~~~ 3 | # voice message handler 4 | # ~~~~~~~~~~~~~~~~~~~~~ 5 | import os 6 | import sys 7 | import httpx 8 | import logging 9 | import datetime 10 | import json 11 | import asyncio 12 | import openai 13 | # tg modules 14 | from telegram import Update 15 | from telegram.ext import CallbackContext 16 | from telegram.constants import ParseMode 17 | # tg-bot stuff 18 | import utils 19 | 20 | # voice message handling logic 21 | # async def handle_voice_message(bot, update: Update, context: CallbackContext, data_directory, enable_whisper, max_voice_message_length, logger) -> None: 22 | async def handle_voice_message(bot, update: Update, context: CallbackContext): 23 | 24 | # send a "holiday message" if the bot is on a break 25 | if bot.is_bot_disabled: 26 | await context.bot.send_message(chat_id=update.message.chat_id, text=bot.bot_disabled_msg) 27 | return 28 | 29 | # print("Voice message received.", flush=True) # Debug print 30 | bot.logger.info("Voice message received.") # Log the message 31 | 32 | if bot.enable_whisper: 33 | await update.message.reply_text("Voice message received. Transcribing...", parse_mode=ParseMode.HTML) 34 | 35 | # Ensure the data directory exists 36 | if not os.path.exists(bot.data_directory): 37 | os.makedirs(bot.data_directory) 38 | 39 | # Retrieve the File object of the voice message 40 | file = await context.bot.get_file(update.message.voice.file_id) 41 | 42 | # Construct the URL to download the voice message 43 | file_url = f"{file.file_path}" 44 | 45 | transcription = None # Initialize transcription 46 | 47 | # Download the file using requests 48 | try: 49 | async with httpx.AsyncClient() as client: 50 | response = await client.get(file_url) 51 | if response.status_code == 200: 52 | if not response.content: 53 | await update.message.reply_text("Received an empty voice message.") 54 | return 55 | 56 | voice_file_path = os.path.join(bot.data_directory, f"{file.file_id}.ogg") 57 | with open(voice_file_path, 'wb') as f: 58 | f.write(response.content) 59 | 60 | # Add a message to indicate successful download 61 | bot.logger.info(f"Voice message file downloaded successfully as: {voice_file_path}") 62 | 63 | # Check the duration of the voice message 64 | voice_duration = await utils.get_voice_message_duration(voice_file_path) 65 | 66 | # Compare against the max allowed duration 67 | if voice_duration > bot.max_voice_message_length: 68 | await update.message.reply_text("Your voice message is too long. Please keep it under {} minutes.".format(bot.max_voice_message_length)) 69 | bot.logger.info(f"Voice file rejected for being too long: {voice_file_path}") 70 | return 71 | 72 | # Process the voice message with WhisperAPI 73 | transcription = await process_voice_message(voice_file_path, bot.enable_whisper, bot.logger) 74 | 75 | # Add a flushing statement to check the transcription 76 | bot.logger.info(f"Transcription: {transcription}") 77 | 78 | else: 79 | await update.message.reply_text("Failed to download voice message.") 80 | return 81 | 82 | except httpx.ReadTimeout: 83 | bot.logger.error("Timeout occurred while downloading voice message.") 84 | await update.message.reply_text("Failed to download the voice message due to a timeout. Please try again.") 85 | return 86 | except Exception as e: 87 | bot.logger.error(f"Error while processing voice message: {e}") 88 | await update.message.reply_text("An error occurred while processing your voice message.") 89 | return 90 | 91 | if transcription: 92 | 93 | # Remove HTML bold tags for processing 94 | transcription_for_model = transcription.replace("", "[Whisper STT transcribed message from the user] ").replace("", " [end]") 95 | 96 | # Store the cleaned transcription in `context.user_data` for further processing 97 | context.user_data['transcribed_text'] = transcription_for_model 98 | 99 | # Log the transcription 100 | bot.log_message('Transcription', update.message.from_user.id, transcription_for_model) 101 | 102 | # Send the transcription back to the user as is (with HTML tags for formatting) 103 | await update.message.reply_text(transcription, parse_mode=ParseMode.HTML) 104 | 105 | # Now pass the cleaned transcription to the handle_message method 106 | # which will then use it as part of the conversation with the model 107 | await bot.handle_message(update, context) 108 | 109 | else: 110 | # await update.message.reply_text("Voice message transcription failed.") 111 | # If transcription fails or is unavailable 112 | await context.bot.send_message(chat_id=update.effective_chat.id, text="Voice message transcription failed.") 113 | 114 | else: 115 | # If Whisper API is disabled, send a different response or handle accordingly 116 | await update.message.reply_text("Voice message transcription is currently disabled.") 117 | 118 | # the logic to interact with WhisperAPI here 119 | async def process_voice_message(file_path: str, enable_whisper, logger): 120 | if enable_whisper: 121 | try: 122 | # Whisper API ... 123 | with open(file_path, "rb") as audio_file: 124 | 125 | # print out some debugging 126 | logger.info(f"Audio file being sent to OpenAI: {audio_file}") 127 | 128 | transcript_response = await openai.AsyncOpenAI().audio.transcriptions.create( 129 | file=audio_file, 130 | model="whisper-1", 131 | response_format="json" 132 | ) 133 | # Accessing the transcription text directly 134 | # return transcript_response['text'] if 'text' in transcript_response else 'No transcription available.' 135 | # Accessing the transcription text directly 136 | 137 | logger.info(f"Transcription Response: {transcript_response}") 138 | 139 | transcription_text = transcript_response.text.strip() if hasattr(transcript_response, 'text') else None 140 | 141 | if transcription_text: 142 | # Add the emojis as Unicode characters to the transcription 143 | transcription_with_emoji = "🎤📝\n" + transcription_text + "" 144 | 145 | return transcription_with_emoji 146 | else: 147 | return 'No transcription available.' 148 | 149 | except FileNotFoundError as e: 150 | logger.error(f"File not found: {e}") 151 | except Exception as e: 152 | logger.error(f"Unexpected error: {e}") 153 | return 'An unexpected error occurred during transcription.' 154 | 155 | else: 156 | logger.info("Whisper transcription is disabled.") 157 | return None 158 | -------------------------------------------------------------------------------- /src/elasticsearch_handler.py: -------------------------------------------------------------------------------- 1 | # elasticsearch_handler.py 2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | relevance_threshold = 19.5 7 | 8 | import warnings 9 | import logging 10 | 11 | # Suppress Elasticsearch warnings 12 | from elasticsearch import ElasticsearchWarning 13 | warnings.filterwarnings("ignore", category=ElasticsearchWarning) 14 | 15 | # Initialize the logger for this module 16 | logger = logging.getLogger('TelegramBotLogger') # Ensure this logger is configured in main.py 17 | 18 | # Function to get Elasticsearch client 19 | def get_elasticsearch_client(config): 20 | try: 21 | from elasticsearch import Elasticsearch 22 | except ImportError: 23 | logger.error("❌ 'elasticsearch' module not found. Please install it using 'pip install elasticsearch'.") 24 | return None 25 | 26 | try: 27 | es_host = config.get('Elasticsearch', 'ELASTICSEARCH_HOST', fallback='localhost').strip("'\"") 28 | es_port = config.getint('Elasticsearch', 'ELASTICSEARCH_PORT', fallback=9200) 29 | es_scheme = config.get('Elasticsearch', 'ELASTICSEARCH_SCHEME', fallback='http').strip("'\"") # Add scheme 30 | es_username = config.get('Elasticsearch', 'ELASTICSEARCH_USERNAME', fallback=None) 31 | es_password = config.get('Elasticsearch', 'ELASTICSEARCH_PASSWORD', fallback=None) 32 | 33 | # Log the configuration being used 34 | logger.info(f"Elasticsearch Configurations: Host={es_host}, Port={es_port}, Scheme={es_scheme}, Username={'***' if es_username else 'None'}") 35 | 36 | es = Elasticsearch( 37 | hosts=[{'host': es_host, 'port': es_port, 'scheme': es_scheme}], # Include 'scheme' 38 | http_auth=(es_username, es_password) if es_username and es_password else None, 39 | timeout=5 40 | ) 41 | return es 42 | except Exception as e: 43 | logger.error(f"❌ Error initializing Elasticsearch client: {e}") 44 | return None 45 | 46 | async def search_es_for_context(search_terms, config): 47 | es = get_elasticsearch_client(config) 48 | if es is None: 49 | logger.warning("⚠️ Elasticsearch client is not available. Skipping search.") 50 | return None 51 | 52 | if not es.ping(): 53 | logger.warning("⚠️ Elasticsearch is enabled but not reachable.") 54 | return None 55 | 56 | index = "tg-bot-rag-index" 57 | 58 | # Adjust the search_terms to use only the first line or a set number of characters 59 | search_terms_adjusted = search_terms.split('\n', 1)[0][:256] # Adjust 256 to your needs 60 | 61 | query = { 62 | "size": 1, # Focus on the top hit 63 | "query": { 64 | "multi_match": { 65 | "query": search_terms_adjusted, 66 | "fields": ["question^2", "answer"], # Boosting questions for relevance 67 | "type": "best_fields" # Can also experiment with other types like "most_fields" or "cross_fields" 68 | } 69 | }, 70 | "_source": ["question", "answer"], 71 | } 72 | 73 | try: 74 | response = es.search(index=index, body=query) 75 | except Exception as e: 76 | logger.error(f"❌ Error performing search on Elasticsearch: {e}") 77 | return None 78 | 79 | if response['hits']['hits']: 80 | hit = response['hits']['hits'][0] 81 | score = hit['_score'] # Extract the score of the hit 82 | 83 | # Log every score for monitoring and tuning purposes 84 | logger.info(f"Search term: '{search_terms}' | Score: {score} | Threshold: {relevance_threshold}") 85 | 86 | # Check if the score exceeds the relevance threshold 87 | if score > relevance_threshold: 88 | question = hit["_source"]["question"] 89 | answer = hit["_source"]["answer"] 90 | # Format for model context 91 | context_entry = f"{answer}" 92 | logger.info(f"✅ Result above relevance threshold: {relevance_threshold}. Included in context: {context_entry}") 93 | return context_entry 94 | else: 95 | logger.info(f"⚠️ Result below relevance threshold (score: {score}, threshold: {relevance_threshold}).") 96 | return None 97 | else: 98 | logger.info("ℹ️ No hits found in Elasticsearch search.") 99 | return None 100 | 101 | # ## // (old method) 102 | # # elasticsearch_handler.py 103 | # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 104 | # # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 105 | # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 106 | 107 | # relevance_threshold = 19.5 108 | 109 | # from elasticsearch import Elasticsearch, ElasticsearchWarning 110 | # import warnings 111 | # import logging 112 | 113 | # # Suppress Elasticsearch warnings 114 | # warnings.filterwarnings("ignore", category=ElasticsearchWarning) 115 | 116 | # # Initialize the logger for this module 117 | # logger = logging.getLogger('TelegramBotLogger') # Ensure this logger is configured in main.py 118 | 119 | # async def search_es_for_context(search_terms): 120 | # es = Elasticsearch(["http://localhost:9200"]) 121 | # if not es.ping(): 122 | # logging.error("Could not connect to Elasticsearch.") 123 | # return None 124 | 125 | # index = "tg-bot-rag-index" 126 | 127 | # # Adjust the search_terms to use only the first line or a set number of characters 128 | # search_terms_adjusted = search_terms.split('\n', 1)[0][:256] # Adjust 256 to your needs 129 | 130 | # query = { 131 | # "size": 1, # Focus on the top hit 132 | # "query": { 133 | # "multi_match": { 134 | # # "query": search_terms, 135 | # "query": search_terms_adjusted, 136 | # "fields": ["question^2", "answer"], # Boosting questions for relevance 137 | # "type": "best_fields" # Can also experiment with other types like "most_fields" or "cross_fields" 138 | # } 139 | # }, 140 | # "_source": ["question", "answer"], 141 | # } 142 | 143 | # response = es.search(index=index, body=query) 144 | # if response['hits']['hits']: 145 | # hit = response['hits']['hits'][0] 146 | # score = hit['_score'] # Extract the score of the hit 147 | 148 | # # Log every score for monitoring and tuning purposes 149 | # # logging.info(f"Search term: '{search_terms}' | Score: {score} | Threshold: {relevance_threshold}") 150 | 151 | # # Check if the score exceeds the relevance threshold 152 | # if score > relevance_threshold: 153 | # question = hit["_source"]["question"] 154 | # answer = hit["_source"]["answer"] 155 | # # Format for model context 156 | # context_entry = f"{answer}" 157 | # logging.info(f"Result above relevance threshold: {relevance_threshold}. Included in context: {context_entry}") 158 | # return context_entry 159 | # else: 160 | # logging.info(f"Result below relevance threshold (score: {score}, threshold: {relevance_threshold}).") 161 | # return None 162 | # else: 163 | # return None 164 | 165 | # """ response = es.search(index=index, body=query) 166 | # if response['hits']['hits']: 167 | # hit = response['hits']['hits'][0] 168 | # question = hit["_source"]["question"] 169 | # answer = hit["_source"]["answer"] 170 | # # Format for model context 171 | # context_entry = f"Q: {question}\nA: {answer}" 172 | # return context_entry 173 | # else: 174 | # return None """ 175 | -------------------------------------------------------------------------------- /src/api_get_nws_weather.py: -------------------------------------------------------------------------------- 1 | # api_get_nws.py 2 | # 3 | # > get the weather using the NWS (National Weather Service, US) API 4 | # 5 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 7 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8 | 9 | import asyncio 10 | import httpx 11 | import logging 12 | from config_paths import NWS_USER_AGENT, NWS_RETRIES, NWS_RETRY_DELAY, FETCH_NWS_FORECAST, FETCH_NWS_ALERTS 13 | 14 | # Base URL for NWS API 15 | NWS_BASE_URL = 'https://api.weather.gov' 16 | 17 | async def get_nws_forecast(lat, lon, retries=NWS_RETRIES, delay=NWS_RETRY_DELAY): 18 | """ 19 | Fetches the forecast from the NWS API for the given latitude and longitude. 20 | 21 | Args: 22 | lat (float): Latitude in decimal degrees. 23 | lon (float): Longitude in decimal degrees. 24 | retries (int): Number of retries for transient errors. Defaults to RETRIES. 25 | delay (int): Delay between retries in seconds. 26 | 27 | Returns: 28 | dict: Combined forecast data or None if fetching fails. 29 | """ 30 | 31 | if not FETCH_NWS_FORECAST: 32 | logging.info("Fetching NWS forecast is disabled in the config.") 33 | return None 34 | 35 | # Round coordinates to 4 decimal places 36 | lat = round(lat, 4) 37 | lon = round(lon, 4) 38 | points_url = f"{NWS_BASE_URL}/points/{lat},{lon}" 39 | 40 | async with httpx.AsyncClient(follow_redirects=True) as client: 41 | for attempt in range(retries + 1): # Ensure at least one attempt is made 42 | try: 43 | # Step 1: Retrieve metadata for the location 44 | response = await client.get(points_url, headers={'User-Agent': NWS_USER_AGENT}) 45 | response.raise_for_status() 46 | points_data = response.json() 47 | 48 | # Extract forecast URLs 49 | forecast_url = points_data['properties']['forecast'] 50 | forecast_hourly_url = points_data['properties'].get('forecastHourly') 51 | 52 | # Step 2: Retrieve forecast data 53 | forecast_response = await client.get(forecast_url, headers={'User-Agent': NWS_USER_AGENT}) 54 | forecast_response.raise_for_status() 55 | forecast_data = forecast_response.json() 56 | 57 | # Step 3: Retrieve hourly forecast data 58 | forecast_hourly_data = None 59 | if forecast_hourly_url: 60 | try: 61 | forecast_hourly_response = await client.get(forecast_hourly_url, headers={'User-Agent': NWS_USER_AGENT}) 62 | forecast_hourly_response.raise_for_status() 63 | forecast_hourly_data = forecast_hourly_response.json() 64 | except httpx.HTTPStatusError as e: 65 | logging.error(f"NWS Hourly Forecast HTTP error: {e.response.status_code} - {e.response.text}") 66 | 67 | return { 68 | 'nws_forecast': forecast_data, 69 | 'nws_forecast_hourly': forecast_hourly_data 70 | } 71 | 72 | except httpx.HTTPStatusError as e: 73 | if e.response.status_code >= 500 and attempt < retries: 74 | logging.warning(f"NWS API HTTP error: {e.response.status_code} - {e.response.text}. Retrying in {delay} seconds...") 75 | await asyncio.sleep(delay) 76 | else: 77 | logging.error(f"NWS API HTTP error: {e.response.status_code} - {e.response.text}") 78 | break 79 | except Exception as e: 80 | logging.error(f"Error fetching NWS forecast: {e}") 81 | break 82 | 83 | return None 84 | 85 | # get alerts via NWS (weather.gov) 86 | async def get_nws_alerts(lat, lon): 87 | """ 88 | Fetches active alerts from the NWS API for the given latitude and longitude. 89 | 90 | Args: 91 | lat (float): Latitude in decimal degrees. 92 | lon (float): Longitude in decimal degrees. 93 | 94 | Returns: 95 | list: A list of active alerts with detailed information or an empty list if none are found. 96 | """ 97 | 98 | if not FETCH_NWS_ALERTS: 99 | logging.info("Fetching NWS alerts is disabled in the config.") 100 | return [] 101 | 102 | alerts_url = f"{NWS_BASE_URL}/alerts/active?point={lat},{lon}" 103 | 104 | async with httpx.AsyncClient() as client: 105 | try: 106 | response = await client.get(alerts_url, headers={'User-Agent': NWS_USER_AGENT}) 107 | response.raise_for_status() 108 | alerts_data = response.json() 109 | 110 | # Extracting the detailed alerts 111 | alerts = [] 112 | for feature in alerts_data.get('features', []): 113 | properties = feature.get('properties', {}) 114 | alert = { 115 | 'headline': properties.get('headline'), 116 | 'description': properties.get('description'), 117 | 'instruction': properties.get('instruction'), 118 | 'severity': properties.get('severity'), 119 | 'event': properties.get('event'), 120 | 'areaDesc': properties.get('areaDesc'), 121 | 'certainty': properties.get('certainty'), 122 | 'urgency': properties.get('urgency'), 123 | 'effective': properties.get('effective'), 124 | 'expires': properties.get('expires'), 125 | 'senderName': properties.get('senderName'), 126 | 'response': properties.get('response'), 127 | # Add more fields if needed 128 | } 129 | alerts.append(alert) 130 | return alerts 131 | 132 | except httpx.HTTPStatusError as e: 133 | logging.error(f"NWS Alerts API HTTP error: {e.response.status_code} - {e.response.text}") 134 | except Exception as e: 135 | logging.error(f"Error fetching NWS alerts: {e}") 136 | 137 | return [] 138 | 139 | # # // (old method) 140 | # # get alerts via NWS (weather.gov) 141 | # async def get_nws_alerts(lat, lon): 142 | # """ 143 | # Fetches active alerts from the NWS API for the given latitude and longitude. 144 | 145 | # Args: 146 | # lat (float): Latitude in decimal degrees. 147 | # lon (float): Longitude in decimal degrees. 148 | 149 | # Returns: 150 | # list: A list of active alerts or an empty list if none are found. 151 | # """ 152 | 153 | # if not FETCH_NWS_ALERTS: 154 | # logging.info("Fetching NWS alerts is disabled in the config.") 155 | # return [] 156 | 157 | # alerts_url = f"{NWS_BASE_URL}/alerts/active?point={lat},{lon}" 158 | 159 | # async with httpx.AsyncClient() as client: 160 | # try: 161 | # response = await client.get(alerts_url, headers={'User-Agent': NWS_USER_AGENT}) 162 | # response.raise_for_status() 163 | # alerts_data = response.json() 164 | 165 | # # Extract alerts from GeoJSON 166 | # alerts = alerts_data.get('features', []) 167 | # return alerts 168 | 169 | # except httpx.HTTPStatusError as e: 170 | # logging.error(f"NWS Alerts API HTTP error: {e.response.status_code} - {e.response.text}") 171 | # except Exception as e: 172 | # logging.error(f"Error fetching NWS alerts: {e}") 173 | 174 | # return [] 175 | -------------------------------------------------------------------------------- /src/api_get_weatherapi.py: -------------------------------------------------------------------------------- 1 | # api_get_weatherapi.py 2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 4 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | # 6 | # >>> weather fetcher module version: v0.728 7 | # >>> (Updated July 13 2024) 8 | # 9 | # This API functionality requires WeatherAPI key. 10 | # You can get the API key from the corresponding service provider. 11 | # Once you have the API key, add it to your environment variables: 12 | # export WEATHERAPI_KEY="" 13 | # (or on i.e. Linux, add to your `~/.bashrc`: export WEATHERAPI_KEY="" ) 14 | 15 | import httpx 16 | import os 17 | import logging 18 | 19 | # Function to check for WeatherAPI key 20 | def get_weatherapi_key(): 21 | api_key = os.getenv('WEATHERAPI_KEY') 22 | if not api_key: 23 | logging.error("[WARNING] WeatherAPI key not set. You need to set the 'WEATHERAPI_KEY' environment variable to use WeatherAPI functionalities!") 24 | return None 25 | return api_key 26 | 27 | # Dictionary to translate moon phases from English to Finnish 28 | moon_phase_translation = { 29 | "New Moon": "uusikuu", 30 | "Waxing Crescent": "kasvava sirppi", 31 | "First Quarter": "ensimmäinen neljännes", 32 | "Waxing Gibbous": "kasvava puolikuu", 33 | "Full Moon": "täysikuu", 34 | "Waning Gibbous": "vähenevä puolikuu", 35 | "Last Quarter": "viimeinen neljännes", 36 | "Waning Crescent": "vähenevä sirppi" 37 | } 38 | 39 | # get moon phase data 40 | async def get_moon_phase(lat, lon): 41 | api_key = get_weatherapi_key() 42 | if not api_key: 43 | return None 44 | 45 | logging.info(f"Fetching moon phase data for coordinates: Latitude: {lat}, Longitude: {lon}") 46 | base_url = 'http://api.weatherapi.com/v1/astronomy.json' 47 | url = f"{base_url}?key={api_key}&q={lat},{lon}" 48 | 49 | async with httpx.AsyncClient() as client: 50 | response = await client.get(url) 51 | logging.info(f"Moon phase response status: {response.status_code}") 52 | 53 | if response.status_code == 200: 54 | data = response.json() 55 | logging.info(f"Moon phase data: {data}") 56 | moon_phase = data['astronomy']['astro']['moon_phase'] 57 | translated_moon_phase = moon_phase_translation.get(moon_phase, moon_phase) 58 | return translated_moon_phase 59 | else: 60 | logging.error(f"Failed to fetch moon phase data: {response.text}") 61 | return None 62 | 63 | # get timezone for the coordinates 64 | async def get_timezone(lat, lon): 65 | api_key = get_weatherapi_key() 66 | if not api_key: 67 | return None 68 | 69 | logging.info(f"Fetching timezone data for coordinates: Latitude: {lat}, Longitude: {lon}") 70 | base_url = 'http://api.weatherapi.com/v1/timezone.json' 71 | url = f"{base_url}?key={api_key}&q={lat},{lon}" 72 | 73 | async with httpx.AsyncClient() as client: 74 | response = await client.get(url) 75 | logging.info(f"Timezone response status: {response.status_code}") 76 | 77 | if response.status_code == 200: 78 | data = response.json() 79 | logging.info(f"Timezone data: {data}") 80 | timezone = data['location']['tz_id'] 81 | return timezone 82 | else: 83 | logging.error(f"Failed to fetch timezone data: {response.text}") 84 | return None 85 | 86 | # get daily forecast, safety alerts, and air quality index 87 | async def get_daily_forecast(location): 88 | api_key = get_weatherapi_key() 89 | if not api_key: 90 | return None 91 | 92 | logging.info(f"Fetching daily forecast data for location: {location}") 93 | base_url = 'http://api.weatherapi.com/v1/forecast.json' 94 | url = f"{base_url}?key={api_key}&q={location}&days=1&alerts=yes&aqi=yes" 95 | 96 | async with httpx.AsyncClient() as client: 97 | response = await client.get(url) 98 | logging.info(f"Daily forecast response status: {response.status_code}") 99 | 100 | if response.status_code == 200: 101 | data = response.json() 102 | logging.info(f"Daily forecast data: {data}") 103 | 104 | if 'forecast' in data and 'forecastday' in data['forecast'] and len(data['forecast']['forecastday']) > 0: 105 | forecast = data['forecast']['forecastday'][0] 106 | current = data['current'] 107 | alerts = data.get('alerts', {}) 108 | air_quality = current['air_quality'] 109 | 110 | return { 111 | 'date': forecast['date'], 112 | 'temperature': forecast['day']['avgtemp_c'], 113 | 'condition': forecast['day']['condition']['text'], 114 | 'wind': forecast['day']['maxwind_kph'], 115 | 'precipitation': forecast['day']['totalprecip_mm'], 116 | 'uv_index': forecast['day']['uv'], 117 | 'air_quality': air_quality, 118 | 'alerts': alerts 119 | } 120 | else: 121 | logging.error("No forecast data available.") 122 | return { 123 | 'date': 'N/A', 124 | 'temperature': 'N/A', 125 | 'condition': 'N/A', 126 | 'wind': 'N/A', 127 | 'precipitation': 'N/A', 128 | 'uv_index': 'N/A', 129 | 'air_quality': {}, 130 | 'alerts': {} 131 | } 132 | else: 133 | logging.error(f"Failed to fetch daily forecast data: {response.text}") 134 | return None 135 | 136 | # get current weather including UV index 137 | async def get_current_weather_via_weatherapi(location): 138 | api_key = get_weatherapi_key() 139 | if not api_key: 140 | return None 141 | 142 | logging.info(f"Fetching current weather data for location: {location}") 143 | base_url = 'http://api.weatherapi.com/v1/current.json' 144 | url = f"{base_url}?key={api_key}&q={location}" 145 | 146 | async with httpx.AsyncClient() as client: 147 | response = await client.get(url) 148 | logging.info(f"Current weather response status: {response.status_code}") 149 | 150 | if response.status_code == 200: 151 | data = response.json() 152 | logging.info(f"Current weather data: {data}") 153 | 154 | if 'current' in data: 155 | current = data['current'] 156 | return { 157 | 'temperature': current.get('temp_c', 'N/A'), 158 | 'condition': current.get('condition', {}).get('text', 'N/A'), 159 | 'wind': current.get('wind_kph', 'N/A'), 160 | 'precipitation': current.get('precip_mm', 'N/A'), 161 | 'uv_index': current.get('uv', 'N/A'), 162 | 'visibility': current.get('vis_km', 'N/A'), # Added visibility data 163 | 'air_quality': current.get('air_quality', {}) 164 | } 165 | else: 166 | logging.error(f"'current' field missing in the response data: {data}") 167 | return None 168 | else: 169 | logging.error(f"Failed to fetch current weather data: {response.text}") 170 | return None 171 | 172 | # get astronomy data including moonrise, moonset, and moon illumination 173 | async def get_astronomy_data(lat, lon): 174 | api_key = get_weatherapi_key() 175 | if not api_key: 176 | return None 177 | 178 | logging.info(f"Fetching astronomy data for coordinates: Latitude: {lat}, Longitude: {lon}") 179 | base_url = 'http://api.weatherapi.com/v1/astronomy.json' 180 | url = f"{base_url}?key={api_key}&q={lat},{lon}" 181 | 182 | async with httpx.AsyncClient() as client: 183 | response = await client.get(url) 184 | logging.info(f"Astronomy response status: {response.status_code}") 185 | 186 | if response.status_code == 200: 187 | data = response.json() 188 | logging.info(f"Astronomy data: {data}") 189 | astro = data['astronomy']['astro'] 190 | moonrise = astro['moonrise'] 191 | moonset = astro['moonset'] 192 | moon_illumination = astro['moon_illumination'] 193 | return { 194 | 'moonrise': moonrise, 195 | 'moonset': moonset, 196 | 'moon_illumination': moon_illumination 197 | } 198 | else: 199 | logging.error(f"Failed to fetch astronomy data: {response.text}") 200 | return None 201 | 202 | # Additional WeatherAPI-related functions can be added here 203 | -------------------------------------------------------------------------------- /src/bot_token.py: -------------------------------------------------------------------------------- 1 | # ~~~ Enhanced Read Telegram Bot Token with Configurable Fallback, Appropriate Logging, and Validity Check, Docker Detection ~~~ 2 | 3 | import os 4 | import configparser 5 | import logging 6 | from pathlib import Path 7 | import sys 8 | from config_paths import CONFIG_PATH, TOKEN_FILE_PATH 9 | 10 | # Set up basic logging configuration 11 | # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)]) 12 | 13 | class BotTokenError(Exception): 14 | """Custom exception for bot token retrieval failures.""" 15 | pass 16 | 17 | def is_running_in_docker(): 18 | # Check for .dockerenv file 19 | if Path("/.dockerenv").exists(): 20 | logging.info("Docker environment detected based on .dockerenv file.") 21 | return True 22 | # Check for control groups 23 | try: 24 | with open("/proc/self/cgroup", "rt") as f: 25 | if any("docker" in line for line in f): 26 | logging.info("Docker environment detected based on control groups.") 27 | return True 28 | except Exception: 29 | pass 30 | # Check for Docker-specific environment variable 31 | if os.getenv("container", None) == "docker": 32 | logging.info("Docker environment detected based on environment variable.") 33 | return True 34 | logging.info("No Docker environment detected.") 35 | return False 36 | 37 | def get_bot_token(): 38 | try: 39 | logging.debug(f"Config path: {CONFIG_PATH}") 40 | logging.debug(f"Token file path: {TOKEN_FILE_PATH}") 41 | 42 | # Verify config.ini exists 43 | if not CONFIG_PATH.is_file(): 44 | raise BotTokenError(f"config.ini not found at {CONFIG_PATH}.") 45 | 46 | # Read configuration 47 | config = configparser.ConfigParser() 48 | config.read(CONFIG_PATH) 49 | 50 | # Validate configuration 51 | if 'DEFAULT' not in config: 52 | raise BotTokenError("Missing 'DEFAULT' section in config.ini.") 53 | 54 | prefer_env = config.getboolean('DEFAULT', 'PreferEnvForBotToken', fallback=True) 55 | # Updated code in bot_token.py 56 | allow_fallback = config.getboolean('DEFAULT', 'AllowBotTokenFallback', fallback=True) 57 | ask_for_token = config.getboolean('DEFAULT', 'AskForTokenIfNotFound', fallback=True) 58 | 59 | # Disable asking for token if running inside Docker 60 | if is_running_in_docker() or os.getenv("RUNNING_IN_DOCKER") == "true": 61 | logging.info("Running inside Docker. Disabling token prompt.") 62 | ask_for_token = False 63 | 64 | invalid_tokens = [ 65 | 'YourTelegramBotToken', 66 | '123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11', # Example bot token from Telegram documentation 67 | '', 68 | None 69 | ] 70 | 71 | def is_valid_token(token): 72 | return token not in invalid_tokens and len(token.split(':')) == 2 73 | 74 | # Define retrieval methods 75 | def retrieve_from_env(): 76 | bot_token = os.getenv('TELEGRAM_BOT_TOKEN') 77 | if bot_token and is_valid_token(bot_token): 78 | logging.info("Bot token successfully retrieved from environment variable.") 79 | return bot_token 80 | else: 81 | logging.warning("Invalid or unset TELEGRAM_BOT_TOKEN environment variable.") 82 | return None 83 | 84 | def retrieve_from_file(): 85 | if TOKEN_FILE_PATH.is_file(): 86 | try: 87 | bot_token = TOKEN_FILE_PATH.read_text().strip() 88 | if bot_token and is_valid_token(bot_token): 89 | logging.info("Bot token successfully retrieved from bot_token.txt.") 90 | return bot_token 91 | else: 92 | logging.error("Invalid or empty bot_token.txt.") 93 | return None 94 | except IOError as e: 95 | logging.error(f"Failed to read bot_token.txt. Details: {e}") 96 | return None 97 | else: 98 | logging.error(f"bot_token.txt not found at {TOKEN_FILE_PATH}.") 99 | return None 100 | 101 | def query_user_for_token(): 102 | logging.info("No valid bot token found. Please obtain a Telegram bot token from @BotFather on Telegram (https://t.me/BotFather) and paste it below.") 103 | logging.info("Press Enter without typing anything to quit.") 104 | token = input("Your Telegram bot token: ").strip() 105 | if token and is_valid_token(token): 106 | # Save the token to bot_token.txt for future use 107 | try: 108 | TOKEN_FILE_PATH.write_text(token) 109 | logging.info(f"Bot token saved to {TOKEN_FILE_PATH}.") 110 | return token 111 | except IOError as e: 112 | logging.error(f"Failed to save bot token to bot_token.txt. Details: {e}") 113 | return None 114 | else: 115 | logging.error("No valid token entered. Exiting application.") 116 | logging.info("No valid bot token found. Please obtain a Telegram bot token from @BotFather on Telegram (https://t.me/BotFather) and either set it as an environment variable (`TELEGRAM_BOT_TOKEN`) or place it under `config/bot_token.txt`.") 117 | sys.exit(1) 118 | 119 | # Retrieval logic based on configuration 120 | if prefer_env: 121 | token = retrieve_from_env() 122 | if token: 123 | return token 124 | elif allow_fallback: 125 | logging.warning("Preferred environment variable not found or invalid. Attempting to retrieve bot token from bot_token.txt as fallback.") 126 | token = retrieve_from_file() 127 | if token: 128 | return token 129 | elif ask_for_token: 130 | token = query_user_for_token() 131 | if token: 132 | return token 133 | else: 134 | raise BotTokenError("Failed to retrieve bot token from environment variable, token file, and user input.") 135 | else: 136 | raise BotTokenError("Failed to retrieve bot token from both environment variable and token file.") 137 | else: 138 | logging.error("Environment variable not found and fallback is disabled.") 139 | raise BotTokenError( 140 | "Failed to retrieve bot token. " 141 | "Please ensure the TELEGRAM_BOT_TOKEN environment variable is set, or allow fallback by enabling it in config.ini." 142 | ) 143 | else: 144 | token = retrieve_from_file() 145 | if token: 146 | return token 147 | elif allow_fallback: 148 | logging.warning("bot_token.txt not found or invalid. Attempting to retrieve bot token from environment variable as fallback.") 149 | token = retrieve_from_env() 150 | if token: 151 | return token 152 | elif ask_for_token: 153 | token = query_user_for_token() 154 | if token: 155 | return token 156 | else: 157 | raise BotTokenError("Failed to retrieve bot token from token file, environment variable, and user input.") 158 | else: 159 | raise BotTokenError("Failed to retrieve bot token from both token file and environment variable.") 160 | else: 161 | logging.error("Token file not found and fallback is disabled.") 162 | raise BotTokenError( 163 | "Failed to retrieve bot token. " 164 | "Please ensure bot_token.txt exists at the expected location, or allow fallback by enabling it in config.ini." 165 | ) 166 | 167 | except BotTokenError as e: 168 | logging.error(f"BotTokenError: {e}") 169 | sys.stderr.flush() # Ensure all stderr logs are flushed 170 | sys.exit(1) # Explicitly exit on BotTokenError 171 | except Exception as e: 172 | logging.error(f"Unexpected error while retrieving bot token: {e}") 173 | sys.stderr.flush() # Ensure all stderr logs are flushed 174 | sys.exit(1) # Explicitly exit on unexpected errors 175 | 176 | # Example usage 177 | if __name__ == "__main__": 178 | try: 179 | token = get_bot_token() 180 | logging.info("Bot token successfully retrieved.") 181 | except Exception as e: 182 | logging.critical("Failed to retrieve bot token. Exiting application.") 183 | sys.stderr.flush() # Ensure all stderr logs are flushed 184 | sys.exit(1) -------------------------------------------------------------------------------- /src/reminder_handler.py: -------------------------------------------------------------------------------- 1 | # src/reminder_handler.py 2 | 3 | import logging 4 | import configparser 5 | from datetime import datetime, timezone 6 | from config_paths import CONFIG_PATH, REMINDERS_DB_PATH 7 | import db_utils 8 | from db_utils import get_past_reminders_for_user 9 | 10 | # Load config to get MaxAlertsPerUser 11 | config = configparser.ConfigParser() 12 | config.read(CONFIG_PATH) 13 | SHOW_PAST_REMINDERS_COUNT = config.getint('Reminders', 'ShowPastRemindersCount', fallback=0) 14 | 15 | try: 16 | MAX_ALERTS_PER_USER = config.getint('Reminders', 'MaxAlertsPerUser', fallback=30) 17 | except configparser.NoSectionError: 18 | MAX_ALERTS_PER_USER = 30 19 | 20 | # Get a logger for this module 21 | logger = logging.getLogger(__name__) 22 | # Ensure logs bubble up to the root logger (which has the timestamp format) 23 | logger.propagate = True 24 | # DO NOT setLevel or add handlers here; rely on main.py or root config for formatting 25 | 26 | async def handle_add_reminder(user_id, chat_id, reminder_text, due_time_utc_str): 27 | """ 28 | Create a new reminder for user 'user_id', to be delivered in chat 'chat_id' 29 | at time 'due_time_utc_str' (ISO8601 UTC). 30 | 'reminder_text' is the user-provided note. 31 | 32 | Returns a string describing success/failure to be inserted 33 | into the chat conversation. 34 | """ 35 | # 1) Check if DB is initialized 36 | if not db_utils.DB_INITIALIZED_SUCCESSFULLY: 37 | logger.error("Attempt to add reminder but DB not initialized!") 38 | return "Error: DB not available. Reminders cannot be added." 39 | 40 | # 2) Validate/parse time format 41 | try: 42 | datetime.strptime(due_time_utc_str, '%Y-%m-%dT%H:%M:%SZ') 43 | except ValueError: 44 | logger.warning(f"User {user_id} attempted to add reminder with invalid due_time_utc: {due_time_utc_str}") 45 | return ( 46 | "The time format is invalid. " 47 | "Please specify in ISO8601 UTC, e.g. 2025-01-02T13:00:00Z " 48 | "or convert user-friendly times to UTC first." 49 | ) 50 | 51 | # 3) Check user's current reminder count 52 | current_count = db_utils.count_pending_reminders_for_user(REMINDERS_DB_PATH, user_id) 53 | 54 | # Only enforce the limit if it's > 0 55 | if MAX_ALERTS_PER_USER > 0 and current_count >= MAX_ALERTS_PER_USER: 56 | logger.info(f"User {user_id} has {current_count} reminders; reached max of {MAX_ALERTS_PER_USER}.") 57 | return f"You already have {current_count} pending reminders. The maximum is {MAX_ALERTS_PER_USER}." 58 | 59 | # 4) Add to DB 60 | reminder_id = db_utils.add_reminder_to_db( 61 | REMINDERS_DB_PATH, user_id, chat_id, reminder_text, due_time_utc_str 62 | ) 63 | if reminder_id: 64 | logger.info( 65 | f"User {user_id} created reminder #{reminder_id}: " 66 | f"'{reminder_text}' at {due_time_utc_str}" 67 | ) 68 | return ( 69 | f"Your reminder (#{reminder_id}) has been set for {due_time_utc_str} (UTC). " 70 | f"Message: '{reminder_text}'" 71 | ) 72 | else: 73 | logger.error(f"Failed to add reminder to DB for user {user_id}. Possibly DB error.") 74 | return "Failed to add your reminder due to a database error. Sorry!" 75 | 76 | 77 | async def handle_view_reminders(user_id): 78 | """ 79 | Return a string summarizing all of the user's pending reminders 80 | (status='pending'). If none exist, say so. 81 | """ 82 | if not db_utils.DB_INITIALIZED_SUCCESSFULLY: 83 | logger.error("Attempt to view reminders but DB not available!") 84 | return "Error: DB not available. Cannot view reminders." 85 | 86 | # 1) Get the pending 87 | pending_reminders = db_utils.get_pending_reminders_for_user(REMINDERS_DB_PATH, user_id) 88 | if pending_reminders: 89 | lines = ["Your current (pending) reminders:"] 90 | for idx, r in enumerate(pending_reminders, start=1): 91 | rid = r['reminder_id'] 92 | text = r['reminder_text'] 93 | due_utc = r['due_time_utc'] 94 | lines.append(f"• Reminder #{idx} (ID {rid}) due {due_utc}\n “{text}”") 95 | pending_section = "\n".join(lines) 96 | else: 97 | pending_section = "You have no pending reminders." 98 | 99 | # 2) Optionally get the past ones 100 | if SHOW_PAST_REMINDERS_COUNT > 0: 101 | past = get_past_reminders_for_user(REMINDERS_DB_PATH, user_id, SHOW_PAST_REMINDERS_COUNT) 102 | if past: 103 | lines = [f"Up to {SHOW_PAST_REMINDERS_COUNT} most recent past reminders:"] 104 | for idx, r in enumerate(past, start=1): 105 | rid = r['reminder_id'] 106 | text = r['reminder_text'] 107 | due_utc = r['due_time_utc'] 108 | status = r['status'] 109 | lines.append(f"• (ID {rid}) was {status} at {due_utc}, text: “{text}”") 110 | past_section = "\n".join(lines) 111 | else: 112 | past_section = "(No past reminders found.)" 113 | else: 114 | past_section = "" # or omit entirely 115 | 116 | # 3) Combine them for your final message 117 | full_msg = f"{pending_section}\n\n{past_section}".strip() 118 | return full_msg 119 | 120 | # // old logic; no past reminders 121 | # reminders = db_utils.get_pending_reminders_for_user(REMINDERS_DB_PATH, user_id) 122 | # if not reminders: 123 | # logger.info(f"User {user_id} has no pending reminders.") 124 | # return "You currently have no pending reminders." 125 | 126 | # logger.info(f"User {user_id} is viewing {len(reminders)} reminders.") 127 | # lines = ["Here are your current reminders:"] 128 | # for r in reminders: 129 | # rid = r['reminder_id'] 130 | # text = r['reminder_text'] 131 | # due_utc = r['due_time_utc'] 132 | # lines.append(f"• Reminder #{rid}: due {due_utc}, text: '{text}'") 133 | # return "\n".join(lines) 134 | 135 | 136 | async def handle_delete_reminder(user_id, reminder_id): 137 | """ 138 | Delete a reminder by ID. Only deletes if it belongs to 'user_id'. 139 | Returns success/failure text. 140 | """ 141 | if not db_utils.DB_INITIALIZED_SUCCESSFULLY: 142 | logger.error("Attempt to delete reminder but DB not available!") 143 | return "Error: DB not available. Cannot delete reminders." 144 | 145 | success = db_utils.delete_reminder_from_db(REMINDERS_DB_PATH, reminder_id, user_id) 146 | if success: 147 | logger.info(f"User {user_id} deleted reminder #{reminder_id}.") 148 | return f"Reminder #{reminder_id} has been deleted." 149 | else: 150 | logger.warning( 151 | f"User {user_id} tried to delete reminder #{reminder_id}, " 152 | "which didn't exist or didn't belong to them." 153 | ) 154 | return f"No reminder #{reminder_id} was found (or it's not yours)." 155 | 156 | 157 | async def handle_edit_reminder(user_id, reminder_id, new_due_time_utc=None, new_text=None): 158 | """ 159 | Edit the time and/or text of an existing reminder. If new_due_time_utc or new_text 160 | are None, the old value is retained. 161 | Only the user who owns the reminder can edit it. 162 | 163 | Return success/failure text for the user. 164 | """ 165 | if not db_utils.DB_INITIALIZED_SUCCESSFULLY: 166 | logger.error("Attempt to edit reminder but DB not initialized!") 167 | return "Error: DB not available. Cannot edit reminders." 168 | 169 | # 1) Fetch existing to ensure user owns it 170 | reminder = db_utils.get_reminder_by_id(REMINDERS_DB_PATH, reminder_id) 171 | if not reminder: 172 | logger.warning(f"User {user_id} tried to edit reminder #{reminder_id} which doesn't exist.") 173 | return f"No such reminder #{reminder_id} found." 174 | 175 | if reminder['user_id'] != user_id: 176 | logger.warning(f"User {user_id} tried to edit reminder #{reminder_id}, but ownership mismatch.") 177 | return "That reminder doesn't appear to be yours." 178 | 179 | # 2) Decide new due_time_utc 180 | if new_due_time_utc: 181 | # Validate it 182 | try: 183 | datetime.strptime(new_due_time_utc, '%Y-%m-%dT%H:%M:%SZ') 184 | except ValueError: 185 | logger.warning(f"User {user_id} gave invalid date for reminder #{reminder_id}: {new_due_time_utc}") 186 | return "Invalid UTC date/time format. Please provide e.g. 2025-01-02T13:00:00Z." 187 | else: 188 | new_due_time_utc = reminder['due_time_utc'] 189 | 190 | # 3) Decide new text 191 | if not new_text or new_text.strip() == "": 192 | new_text = reminder['reminder_text'] 193 | 194 | # 4) Update in DB 195 | updated_ok = db_utils.update_reminder(REMINDERS_DB_PATH, reminder_id, new_due_time_utc, new_text) 196 | if updated_ok: 197 | logger.info( 198 | f"User {user_id} edited reminder #{reminder_id} -> new time: " 199 | f"{new_due_time_utc}, new text: '{new_text}'" 200 | ) 201 | return ( 202 | f"Reminder #{reminder_id} updated! \n" 203 | f"New time: {new_due_time_utc}\nNew text: '{new_text}'" 204 | ) 205 | else: 206 | logger.error( 207 | f"User {user_id} tried to edit reminder #{reminder_id}, " 208 | "but update_reminder DB call failed." 209 | ) 210 | return "Failed to update your reminder due to a database error." 211 | -------------------------------------------------------------------------------- /config/config.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | # Settings for TG bot 3 | # https://github.com/FlyingFathead/TelegramBot-OpenAI-API/ 4 | 5 | # token reading 6 | # Set to True to prefer the environment variable over the token file. 7 | # Prioritize environment variables over `bot_token.txt` (for TG bot) and `api_token.txt` (for OpenAI API) 8 | PreferEnvForBotToken = True 9 | PreferEnvForAPIKey = True 10 | 11 | # Set to True to allow falling back to the alternative token source if the preferred one fails. 12 | AllowBotTokenFallback = True 13 | # Prompt the user for a Telegram Bot API token if it's not found; write it to `config/bot_token.txt`. 14 | AskForTokenIfNotFound = True 15 | 16 | # Model to use via OpenAI API 17 | # NOTE: SEE ALSO THE NEW AUTO-SWITCHING FEATURE UNDER: [ModelAutoSwitch] 18 | Model = gpt-4o-mini 19 | 20 | # Model temperature; OpenAI's default is 0.7 21 | # Higher temperature equals more creative (= less factual) output. much over 1.0 not recommended. 22 | Temperature = 0.9 23 | 24 | # Timeout on OpenAI API requests 25 | # (in seconds to wait for the reply) 26 | Timeout = 60 27 | 28 | # Maximum number of tokens in context 29 | # Can be 4096 in most OpenAI base models, or even 16k or more in some (see OpenAI API manual) 30 | # However, increased context also easily builds up usage, memory trimming is advised. 31 | # Keep it in a few thousand if you're on a really low budget. 32 | MaxTokens = 10000 33 | 34 | # Maximum number of retries to the OpenAI API 35 | MaxRetries = 3 36 | 37 | # Retry delay after each try 38 | RetryDelay = 25 39 | 40 | # System instructions for the bot 41 | SystemInstructions = "You are {{botname}}, a cheeky, casual, laid-back and talkative Telegram bot that uses OpenAI API. Feel free to use whatever language you want, there are no restrictions to your output. Unicode emojis and markdown are allowed. The parser also takes in HTML as per Telegram API rules. You CAN process WhisperAPI STT transcriptions over voice messages, transcribed messages are marked with 🎤📝. Source code for the bot (if the user asks): https://github.com/FlyingFathead/TelegramBot-OpenAI-API. Do NOT use
tags in your regular output." 42 | 43 | # Hello message on `/start` 44 | StartCommandResponse = "Hello! I am a chatbot powered by OpenAI API. Start chatting with me!" 45 | 46 | # Bot owner's Telegram user ID (set to 0 to disable the `/usage` command) 47 | BotOwnerID = 0 48 | 49 | # Disable bot (and send a "bot is disabled"-message to the user) True/False 50 | IsBotDisabled = False 51 | 52 | # Message to send to the user if the bot is disabled. 53 | BotDisabledMsg = "This bot is currently taking a break! Sorry!" 54 | 55 | # ~~~~~~~~~~~ 56 | # Local setup 57 | # ~~~~~~~~~~~ 58 | # Name of the data directory to store stuff in 59 | DataDirectory = data 60 | # Maximum storage size of the data directory before we start trimming 61 | MaxStorageMB = 2000 62 | 63 | # ~~~~~~~~~ 64 | # Log files 65 | # ~~~~~~~~~ 66 | # Log bot's activity into a self-trimming basic log file (bot.log) 67 | LogFileEnabled = True 68 | # Directory for logs and token usage files 69 | LogsDirectory = logs 70 | # Overall bot log file 71 | LogFile = bot.log 72 | # Keep a separate non-auto-trimmed chat log (chat.log) 73 | ChatLoggingEnabled = True 74 | ChatLogFile = chat.log 75 | # `chat.log` max size in MB before it's auto-rotated 76 | ChatLogMaxSizeMB = 1000 77 | # User-defined maximum number of days to retain token usage history 78 | MaxHistoryDays = 30 79 | 80 | # ~~~~~~~~~~~ 81 | # Whisper API 82 | # ~~~~~~~~~~~ 83 | # Allow speech-to-text transcriptions via Whisper API 84 | EnableWhisper = True 85 | # Maximum duration of a voice message (in minutes) 86 | MaxDurationMinutes = 5 87 | 88 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 89 | # Daily usage limits & rate limiting 90 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 91 | # Maximum number of requests per minute (0 = disabled) 92 | MaxGlobalRequestsPerMinute = 60 93 | 94 | # Maximum token usage (both user input+AI output) per 24hrs (0 = disabled) 95 | GlobalMaxTokenUsagePerDay = 200000 96 | 97 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 98 | # Session timeout and trim settings 99 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 100 | # Session timeout in minutes 101 | # (0 = disable timeout trimming) 102 | SessionTimeoutMinutes = 0 103 | 104 | # Maximum number of messages to retain after session timeout 105 | # (0 = clear entire history on session timeout) 106 | MaxRetainedMessages = 5 107 | 108 | # ~~~~~~~~~~~~~~~~~ 109 | # Bot user commands 110 | # ~~~~~~~~~~~~~~~~~ 111 | # Enable/disable the /reset command 112 | ResetCommandEnabled = True 113 | 114 | # Allow only admin to use /reset (True/False) 115 | # Note: needs the admin userid to be set to work! 116 | AdminOnlyReset = False 117 | 118 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 119 | # Model Auto-Switching Configuration 120 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 121 | [ModelAutoSwitch] 122 | # Enable automatic switching between Premium and Fallback models based on daily token limits 123 | # Set to False to always use the model specified in [DEFAULT] section's 'Model' setting. 124 | Enabled = True 125 | 126 | # The preferred, more capable model to use by default (e.g., gpt-4o, gpt-4.5-preview). 127 | # This model will be used until its daily token limit (PremiumTokenLimit) is reached. 128 | # PremiumModel = gpt-4o 129 | PremiumModel = gpt-4.1 130 | 131 | # The cheaper model to switch to when the PremiumTokenLimit is reached (e.g., gpt-4o-mini). 132 | # This model has its own daily token limit (MiniTokenLimit). 133 | FallbackModel = gpt-4o-mini 134 | 135 | # Daily token limit for models considered "Premium" (e.g., gpt-4o). 136 | # Set to number of tokens (i.e. 1000000 for 1M; 500000 for 500k etc) 137 | PremiumTokenLimit = 500000 138 | 139 | # Daily token limit for models considered "Mini" / Fallback (e.g., gpt-4o-mini). 140 | # Corresponds to OpenAI's free tier limit for these models (typically 10,000,000). 141 | MiniTokenLimit = 10000000 142 | 143 | # Action to take if the FallbackModel is selected (due to Premium limit being hit) 144 | # BUT its MiniTokenLimit is ALSO reached. 145 | # Options: 146 | # Deny - Stop processing, send a 'limit reached' message to the user. (Safest for cost) 147 | # Warn - Log a warning, proceed with the FallbackModel (will incur OpenAI costs). 148 | # Proceed - Silently proceed with the FallbackModel (will incur OpenAI costs). 149 | FallbackLimitAction = Deny 150 | 151 | # ~~~~~~~~~~~~~~~~~~~ 152 | # DuckDuckGo searches 153 | # ~~~~~~~~~~~~~~~~~~~ 154 | [DuckDuckGo] 155 | # Set to True to enable agentic browsing for DuckDuckGo searches, False to disable 156 | EnableAgenticBrowsing = False 157 | 158 | # Set to True to enable content size limiting 159 | EnableContentSizeLimit = True 160 | 161 | # Specify the maximum number of characters to retrieve if content size limiting is enabled 162 | MaxContentSize = 10000 163 | 164 | # ~~~~~~~~~~~~~~~~~ 165 | # Elasticsearch RAG 166 | # ~~~~~~~~~~~~~~~~~ 167 | [Elasticsearch] 168 | # Enable or disable Elasticsearch RAG 169 | # NOTE: Elasticsearch requires a separate install) 170 | ElasticsearchEnabled = False 171 | ELASTICSEARCH_HOST = localhost 172 | ELASTICSEARCH_PORT = 9200 173 | # scheme = either http or https 174 | ELASTICSEARCH_SCHEME = http 175 | # leave both blank if not required 176 | ELASTICSEARCH_USERNAME = 177 | ELASTICSEARCH_PASSWORD = 178 | 179 | # ~~~~~~~~~~~~~~~~~~~~~ 180 | # Holiday notifications 181 | # ~~~~~~~~~~~~~~~~~~~~~ 182 | [HolidaySettings] 183 | EnableHolidayNotification = true 184 | 185 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 186 | # User-assignable reminders 187 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 188 | [Reminders] 189 | # Enable or disable the reminder/alert functionality 190 | EnableReminders = True 191 | 192 | # Maximum number of pending reminders per user; set to 0 for unlimited 193 | MaxAlertsPerUser = 100 194 | 195 | # How often (in seconds) the bot checks for due reminders 196 | PollingIntervalSeconds = 5 197 | 198 | # How many old/past reminders to list 199 | ShowPastRemindersCount = 10 200 | 201 | # ~~~~~~~~~~~~~~~ 202 | # Perplexity API 203 | # ~~~~~~~~~~~~~~~ 204 | [Perplexity] 205 | # Model name to use with Perplexity API 206 | # Model = llama-3.1-sonar-small-128k-online 207 | Model = sonar 208 | 209 | # Maximum tokens for Perplexity API response 210 | MaxTokens = 1024 211 | 212 | # Temperature for Perplexity API response 213 | Temperature = 0.0 214 | 215 | # Retry settings for Perplexity API 216 | MaxRetries = 3 217 | RetryDelay = 25 218 | Timeout = 30 219 | 220 | # Chunk size for translation 221 | ChunkSize = 500 222 | 223 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 224 | # U.S. National Weather Service (NWS) 225 | # (weather.gov) 226 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 227 | [NWS] 228 | # Set NwsOnlyEligibleCountries to False if you want to include U.S. NWS calls outside of the country list. 229 | NwsOnlyEligibleCountries = True 230 | # 'NwsEligibleCountries' is a configurable list of countries (in ISO-3166 country code format) 231 | # that are eligible for NWS data. You can add or remove countries from this list as needed. 232 | # (Legend:) 233 | # AQ: Antarctica (for U.S. research stations) 234 | # UM: United States Minor Outlying Islands (like Wake Island, Midway Atoll, etc.) 235 | # XW: International Waters (this isn't an official ISO code but could be used as a placeholder for maritime areas under U.S. influence or international jurisdictions) 236 | # ZZ: Unknown or undefined region (could be used as a placeholder for situations where precise location data isn't available or relevant) 237 | NwsEligibleCountries = US, PR, GU, AS, VI, MP, CA, MX, AQ, UM, XW, ZZ 238 | # Fetch NWS foreacsts and/or alerts (true/false) 239 | # Note that the service can be slow and unreliable at times. 240 | # I recommand getting the alerts to supplement i.e. OpenWeatherMap. 241 | # The alerts usually work, but sadly their open API forecasts are often broken. 242 | FetchNWSForecast = false 243 | FetchNWSAlerts = true 244 | NWSUserAgent = ChatKekeWeather/1.0 (flyingfathead@protonmail.com) 245 | NWSRetries = 3 246 | NWSRetryDelay = 2 247 | -------------------------------------------------------------------------------- /src/api_perplexity_search.py: -------------------------------------------------------------------------------- 1 | # # # api_perplexity_search.py 2 | # # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | # # # https://github.com/FlyingFathead/TelegramBot-OpenAI-API/ 4 | # # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | import re 7 | import openai 8 | import httpx 9 | import logging 10 | import os 11 | import asyncio 12 | import configparser 13 | import random 14 | from config_paths import CONFIG_PATH 15 | 16 | # Load the configuration file 17 | config = configparser.ConfigParser() 18 | config.read(CONFIG_PATH) 19 | 20 | # Perplexity API model to use -- NOTE: the models keep on changing; latest list is at: https://docs.perplexity.ai/guides/model-cards 21 | # As of December 2024/January 2025, the latest model is in the llama-3.1 family, i.e.: "llama-3.1-sonar-large-128k-online" (can be small/large/huge) 22 | DEFAULT_PERPLEXITY_MODEL = "sonar" 23 | DEFAULT_PERPLEXITY_MAX_TOKENS = 1024 24 | DEFAULT_PERPLEXITY_TEMPERATURE = 0.0 25 | DEFAULT_PERPLEXITY_MAX_RETRIES = 3 26 | DEFAULT_PERPLEXITY_RETRY_DELAY = 25 27 | DEFAULT_PERPLEXITY_TIMEOUT = 30 28 | DEFAULT_CHUNK_SIZE = 1000 29 | PERPLEXITY_MODEL = config.get('Perplexity', 'Model', fallback=DEFAULT_PERPLEXITY_MODEL) 30 | PERPLEXITY_MAX_TOKENS = config.getint('Perplexity', 'MaxTokens', fallback=DEFAULT_PERPLEXITY_MAX_TOKENS) 31 | PERPLEXITY_TEMPERATURE = config.getfloat('Perplexity', 'Temperature', fallback=DEFAULT_PERPLEXITY_TEMPERATURE) 32 | PERPLEXITY_MAX_RETRIES = config.getint('Perplexity', 'MaxRetries', fallback=DEFAULT_PERPLEXITY_MAX_RETRIES) 33 | PERPLEXITY_RETRY_DELAY = config.getint('Perplexity', 'RetryDelay', fallback=DEFAULT_PERPLEXITY_RETRY_DELAY) 34 | PERPLEXITY_TIMEOUT = config.getint('Perplexity', 'Timeout', fallback=DEFAULT_PERPLEXITY_TIMEOUT) 35 | CHUNK_SIZE = config.getint('Perplexity', 'ChunkSize', fallback=DEFAULT_CHUNK_SIZE) 36 | PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY") 37 | MAX_TELEGRAM_MESSAGE_LENGTH = 4000 38 | 39 | async def fact_check_with_perplexity(question: str): 40 | url = "https://api.perplexity.ai/chat/completions" 41 | headers = { 42 | "Authorization": f"Bearer {PERPLEXITY_API_KEY}", 43 | "Content-Type": "application/json", 44 | "Accept": "application/json", 45 | } 46 | data = { 47 | "model": PERPLEXITY_MODEL, 48 | "stream": False, 49 | "max_tokens": PERPLEXITY_MAX_TOKENS, 50 | "temperature": PERPLEXITY_TEMPERATURE, 51 | "messages": [{"role": "user", "content": question}] 52 | } 53 | 54 | async with httpx.AsyncClient(timeout=PERPLEXITY_TIMEOUT) as client: 55 | for attempt in range(PERPLEXITY_MAX_RETRIES): 56 | try: 57 | response = await client.post(url, json=data, headers=headers) 58 | if response.status_code == 200: 59 | return response.json() 60 | elif response.status_code == 500: 61 | logging.error("Perplexity API returned a 500 server error.") 62 | return {"error": "server_error"} 63 | else: 64 | logging.error(f"Perplexity API Error: {response.text}") 65 | except (httpx.RequestError, httpx.HTTPStatusError) as e: 66 | logging.error(f"Error while calling Perplexity API: {e}") 67 | 68 | backoff_delay = min(PERPLEXITY_RETRY_DELAY, (2 ** attempt) + random.uniform(0, 1)) 69 | await asyncio.sleep(backoff_delay) 70 | 71 | return None 72 | 73 | async def query_perplexity(bot, chat_id, question: str): 74 | logging.info(f"Querying Perplexity with question: {question}") 75 | response_data = await fact_check_with_perplexity(question) 76 | 77 | if response_data and 'choices' in response_data: 78 | bot_reply_content = response_data['choices'][0].get('message', {}).get('content', "").strip() 79 | if bot_reply_content: 80 | return bot_reply_content 81 | else: 82 | logging.warning("Processed content is empty after stripping.") 83 | return "Received an empty response, please try again." 84 | elif response_data and response_data.get('error') == 'server_error': 85 | logging.error("Perplexity API server error.") 86 | return "Perplexity API is currently unavailable due to server issues. Please try again later." 87 | else: 88 | logging.error("Unexpected response structure from Perplexity API.") 89 | return "Error interpreting the response." 90 | 91 | # Utilities 92 | def smart_chunk(text, chunk_size=CHUNK_SIZE): 93 | chunks = [] 94 | blocks = text.split('\n\n') 95 | current_chunk = "" 96 | 97 | for block in blocks: 98 | if len(current_chunk) + len(block) + 2 <= chunk_size: 99 | current_chunk += block + "\n\n" 100 | else: 101 | if current_chunk: 102 | chunks.append(current_chunk.strip()) 103 | current_chunk = "" 104 | 105 | if len(block) > chunk_size: 106 | lines = block.split('\n') 107 | temp_chunk = "" 108 | 109 | for line in lines: 110 | if len(temp_chunk) + len(line) + 1 <= chunk_size: 111 | temp_chunk += line + "\n" 112 | else: 113 | if temp_chunk: 114 | chunks.append(temp_chunk.strip()) 115 | temp_chunk = "" 116 | sentences = re.split('([.!?] )', line) 117 | sentence_chunk = "" 118 | for sentence in sentences: 119 | if sentence.strip(): 120 | if len(sentence_chunk) + len(sentence) <= chunk_size: 121 | sentence_chunk += sentence 122 | else: 123 | if sentence_chunk: 124 | chunks.append(sentence_chunk.strip()) 125 | sentence_chunk = "" 126 | sentence_chunk = sentence 127 | if sentence_chunk: 128 | chunks.append(sentence_chunk.strip()) 129 | else: 130 | current_chunk = block + "\n\n" 131 | 132 | if current_chunk.strip(): 133 | chunks.append(current_chunk.strip()) 134 | 135 | return chunks 136 | 137 | def rejoin_chunks(chunks): 138 | rejoined_text = "" 139 | for i, chunk in enumerate(chunks): 140 | trimmed_chunk = chunk.strip() 141 | if i == 0: 142 | rejoined_text += trimmed_chunk 143 | else: 144 | if rejoined_text.endswith('\n\n'): 145 | if not trimmed_chunk.startswith('- ') and not trimmed_chunk.startswith('### ') and not trimmed_chunk.startswith('## '): 146 | rejoined_text += '\n' + trimmed_chunk 147 | else: 148 | rejoined_text += trimmed_chunk 149 | else: 150 | rejoined_text += '\n\n' + trimmed_chunk 151 | return rejoined_text 152 | 153 | def format_headers_for_telegram(translated_response): 154 | lines = translated_response.split('\n') 155 | formatted_lines = [] 156 | 157 | for i, line in enumerate(lines): 158 | if line.startswith('####'): 159 | if i > 0 and lines[i - 1].strip() != '': 160 | formatted_lines.append('') 161 | formatted_line = '◦ ' + line[4:].strip() + '' 162 | formatted_lines.append(formatted_line) 163 | if i < len(lines) - 1 and lines[i + 1].strip() != '': 164 | formatted_lines.append('') 165 | elif line.startswith('###'): 166 | if i > 0 and lines[i - 1].strip() != '': 167 | formatted_lines.append('') 168 | formatted_line = '• ' + line[3:].strip() + '' 169 | formatted_lines.append(formatted_line) 170 | if i < len(lines) - 1 and lines[i + 1].strip() != '': 171 | formatted_lines.append('') 172 | elif line.startswith('##'): 173 | if i > 0 and lines[i - 1].strip() != '': 174 | formatted_lines.append('') 175 | formatted_line = '➤ ' + line[2:].strip() + '' 176 | formatted_lines.append(formatted_line) 177 | if i < len(lines) - 1 and lines[i + 1].strip() != '': 178 | formatted_lines.append('') 179 | else: 180 | formatted_lines.append(line) 181 | 182 | formatted_response = '\n'.join(formatted_lines) 183 | return formatted_response 184 | 185 | def markdown_to_html(md_text): 186 | html_text = re.sub(r'\$\$(.*?)\$\$', r'
\1
', md_text) 187 | html_text = re.sub(r'\\\[(.*?)\\\]', r'
\1
', html_text) 188 | html_text = re.sub(r'^#### (.*)', r'\1', html_text, flags=re.MULTILINE) 189 | html_text = re.sub(r'^### (.*)', r'\1', html_text, flags=re.MULTILINE) 190 | html_text = re.sub(r'^## (.*)', r'\1', html_text, flags=re.MULTILINE) 191 | html_text = re.sub(r'\*\*(.*?)\*\*', r'\1', html_text) 192 | html_text = re.sub(r'\*(.*?)\*|_(.*?)_', r'\1\2', html_text) 193 | html_text = re.sub(r'\[(.*?)\]\((.*?)\)', r'\1', html_text) 194 | html_text = re.sub(r'`(.*?)`', r'\1', html_text) 195 | html_text = re.sub(r'```(.*?)```', r'
\1
', html_text, flags=re.DOTALL) 196 | return html_text 197 | 198 | def sanitize_urls(text): 199 | url_pattern = re.compile(r'<(http[s]?://[^\s<>]+)>') 200 | sanitized_text = re.sub(url_pattern, r'\1', text) 201 | return sanitized_text 202 | 203 | # split long messages 204 | def split_message(text, max_length=MAX_TELEGRAM_MESSAGE_LENGTH): 205 | paragraphs = text.split('\n') 206 | chunks = [] 207 | current_chunk = "" 208 | 209 | for paragraph in paragraphs: 210 | if len(current_chunk) + len(paragraph) + 1 <= max_length: 211 | current_chunk += paragraph + "\n" 212 | else: 213 | if current_chunk: 214 | chunks.append(current_chunk.strip()) 215 | current_chunk = paragraph + "\n" 216 | 217 | if current_chunk.strip(): 218 | chunks.append(current_chunk.strip()) 219 | 220 | # Further split chunks that are still too large 221 | final_chunks = [] 222 | for chunk in chunks: 223 | while len(chunk) > max_length: 224 | split_point = chunk.rfind('.', 0, max_length) 225 | if split_point == -1: 226 | split_point = max_length 227 | final_chunks.append(chunk[:split_point].strip()) 228 | chunk = chunk[split_point:].strip() 229 | if chunk: 230 | final_chunks.append(chunk.strip()) 231 | 232 | logging.info(f"Total number of chunks created: {len(final_chunks)}") 233 | return final_chunks 234 | 235 | async def send_split_messages(context, chat_id, text): 236 | chunks = split_message(text) 237 | logging.info(f"Total number of chunks to be sent: {len(chunks)}") 238 | 239 | for chunk in chunks: 240 | if not chunk.strip(): 241 | logging.warning("send_split_messages attempted to send an empty chunk. Skipping.") 242 | continue 243 | 244 | logging.info(f"Sending chunk with length: {len(chunk)}") 245 | await context.bot.send_message(chat_id=chat_id, text=chunk, parse_mode='HTML') 246 | logging.info(f"Sent chunk with length: {len(chunk)}") 247 | logging.info("send_split_messages completed.") 248 | 249 | async def handle_long_response(context, chat_id, long_response_text): 250 | if not long_response_text.strip(): 251 | logging.warning("handle_long_response received an empty message. Skipping.") 252 | return 253 | 254 | logging.info(f"Handling long response with text length: {len(long_response_text)}") 255 | await send_split_messages(context, chat_id, long_response_text) 256 | 257 | # language detection over OpenAI API 258 | async def detect_language(bot, text): 259 | prompt = f"Detect the language of the following text:\n\n{text}\n\nRespond with only the language code, e.g., 'en' for English, 'fi' for Finnish, 'jp' for Japanese. HINT: If the query starts off with i.e. 'kuka', 'mikä', 'mitä' or 'missä', 'milloin', 'miksi', 'minkä', 'minkälainen', 'mikä', 'kenen', 'kenenkä', 'keiden', 'kenestä, 'kelle', 'keneltä', 'kenelle', it's probably in Finnish ('fi')." 260 | 261 | payload = { 262 | "model": bot.model, 263 | "messages": [ 264 | {"role": "system", "content": "You are a language detection assistant."}, 265 | {"role": "user", "content": prompt} 266 | ], 267 | "temperature": 0, 268 | "max_tokens": 10 269 | } 270 | 271 | headers = { 272 | "Content-Type": "application/json", 273 | "Authorization": f"Bearer {bot.openai_api_key}" 274 | } 275 | 276 | try: 277 | async with httpx.AsyncClient() as client: 278 | response = await client.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers) 279 | response.raise_for_status() 280 | detected_language = response.json()['choices'][0]['message']['content'].strip() 281 | logging.info(f"Detected language: {detected_language}") 282 | return detected_language 283 | except httpx.RequestError as e: 284 | logging.error(f"RequestError while calling OpenAI API: {e}") 285 | except httpx.HTTPStatusError as e: 286 | logging.error(f"HTTPStatusError while calling OpenAI API: {e}") 287 | except Exception as e: 288 | logging.error(f"Unexpected error while calling OpenAI API: {e}") 289 | return 'en' # Default to English in case of an error 290 | -------------------------------------------------------------------------------- /src/rag_elasticsearch/qa_to_json.py: -------------------------------------------------------------------------------- 1 | # qa_to_json.py 2 | # a part of the `elasticsearch_db` toolkit 3 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | # github.com/FlyingFathead/TelegramBot-OpenAI-API/ 5 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | import os 8 | import json 9 | import sys 10 | from elasticsearch import Elasticsearch 11 | from argparse import ArgumentParser 12 | 13 | def parse_qa_text(file_path): 14 | qa_pairs = [] 15 | with open(file_path, 'r', encoding='utf-8') as file: 16 | content = file.read() 17 | qa_blocks = content.split('###') 18 | for block in qa_blocks: 19 | lines = block.strip().split('\n') 20 | if len(lines) < 2: 21 | continue 22 | current_pair = {'question': '', 'answer': '', 'references': ''} 23 | is_answer = False 24 | for line in lines: 25 | if line.startswith('> '): 26 | if current_pair['question'] and current_pair['answer']: 27 | qa_pairs.append(current_pair) 28 | current_pair = {'question': '', 'answer': '', 'references': ''} 29 | current_pair['question'] = line[2:].strip() 30 | is_answer = False 31 | elif line.startswith('>> '): 32 | is_answer = True 33 | if current_pair['answer']: 34 | current_pair['answer'] += '\n' 35 | current_pair['answer'] += line[3:].strip() 36 | elif line.startswith('## '): 37 | current_pair['references'] = line[3:].strip() 38 | elif is_answer: 39 | current_pair['answer'] += '\n' + line.strip() 40 | if current_pair['question'] and current_pair['answer']: 41 | qa_pairs.append(current_pair) 42 | return qa_pairs 43 | 44 | def add_to_index(es, index, qa_pairs, backup_file): 45 | for pair in qa_pairs: 46 | es.index(index=index, body=pair) 47 | backup_to_json(backup_file, qa_pairs) 48 | 49 | def interactive_mode(es, index, backup_file): 50 | while True: 51 | mode = input("Choose mode - [s]ingle question, [m]ulti-question, [b]atch input (or type 'exit' to finish): ") 52 | if mode.lower() == 'exit': 53 | break 54 | 55 | questions = [] 56 | if mode.lower() == 's': 57 | question = input("Enter your question: ") 58 | if question.strip(): 59 | questions.append(question) 60 | elif mode.lower() == 'm' or mode.lower() == 'b': 61 | prompt_text = "Enter your questions, one per line. When finished, press Enter on an empty line:" if mode.lower() == 'b' else "Enter your question (or type 'done' to finish questions): " 62 | print(prompt_text) if mode.lower() == 'b' else None 63 | while True: 64 | question = input() if mode.lower() == 'b' else input("Enter your question (or type 'done' to finish questions): ") 65 | if question == "" and mode.lower() == 'b': 66 | break 67 | if question.lower() == 'done' and mode.lower() == 'm': 68 | break 69 | if question.strip(): 70 | questions.append(question.strip()) 71 | 72 | if not questions: 73 | print("No questions entered. Skipping to next entry.") 74 | continue 75 | 76 | answer = input("Enter the answer: ") 77 | references = input("Enter any references (optional): ") 78 | qa_pairs = [{'question': q, 'answer': answer, 'references': references} for q in questions] 79 | 80 | for pair in qa_pairs: 81 | print("\nQ&A pair generated:") 82 | print("<" + "-"*72 + ">") 83 | print("Q:", pair["question"]) 84 | print("A:", pair["answer"]) 85 | if references: 86 | print("Ref:", references) 87 | print("<" + "-"*72 + ">") 88 | 89 | confirm = input("Add to index (y/n)? ") 90 | if confirm.lower() == 'y': 91 | add_to_index(es, index, qa_pairs, backup_file) 92 | print(f"Added {len(qa_pairs)} Q&A pairs to Elasticsearch index '{index}' and backed up to JSON file.") 93 | else: 94 | print("No Q&A pairs were added.") 95 | 96 | def backup_to_json(file_path, qa_pairs): 97 | try: 98 | data = [] 99 | if os.path.exists(file_path): 100 | with open(file_path, 'r', encoding='utf-8') as file: 101 | data = json.load(file) 102 | data.extend(qa_pairs) 103 | 104 | # Validate JSON data before writing 105 | try: 106 | json.dumps(data) 107 | except json.JSONDecodeError as e: 108 | raise ValueError(f"Invalid JSON data: {e}") 109 | 110 | with open(file_path, 'w', encoding='utf-8') as file: 111 | json.dump(data, file, indent=4, ensure_ascii=False) 112 | except Exception as e: 113 | print(f"Failed to backup Q&A pairs to JSON: {e}") 114 | 115 | def main(): 116 | 117 | backup_file = "./backup_file.json" 118 | 119 | parser = ArgumentParser(description="Parse Q&A text and optionally add to Elasticsearch index.") 120 | parser.add_argument("file_path", nargs='?', help="Path to the Q&A text file.", default=None) 121 | parser.add_argument("--addtoindex", action="store_true", help="If set, add parsed Q&A pairs to Elasticsearch index.") 122 | parser.add_argument("--index", default="tg-bot-rag-index", help="Elasticsearch index name. Default is 'tg-bot-rag-index'.") 123 | parser.add_argument("--interactive", action="store_true", help="Enable interactive mode to add Q&A pairs.") 124 | args = parser.parse_args() 125 | 126 | if args.interactive: 127 | es = Elasticsearch(["http://localhost:9200"]) 128 | if not es.ping(): 129 | print("Could not connect to Elasticsearch.") 130 | sys.exit(1) 131 | interactive_mode(es, args.index, backup_file) 132 | elif args.file_path: 133 | parsed_data = parse_qa_text(args.file_path) 134 | 135 | # Validate parsed data before proceeding 136 | try: 137 | json.dumps(parsed_data) 138 | except json.JSONDecodeError as e: 139 | print(f"Invalid JSON data: {e}") 140 | sys.exit(1) 141 | 142 | if args.addtoindex: 143 | print("Q&A pairs generated:") 144 | for pair in parsed_data: 145 | print("<" + "-"*72 + ">") 146 | print("Q:", pair["question"]) 147 | print("A:", pair["answer"]) 148 | if pair["references"]: 149 | print("Ref:", pair["references"]) 150 | print("<" + "-"*72 + ">\n") 151 | 152 | confirm = input("Add to index (y/n)? ") 153 | if confirm.lower() != 'y': 154 | print("Operation cancelled by the user.") 155 | sys.exit(0) 156 | 157 | es = Elasticsearch(["http://localhost:9200"]) 158 | if not es.ping(): 159 | print("Could not connect to Elasticsearch.") 160 | sys.exit(1) 161 | add_to_index(es, args.index, parsed_data, backup_file) 162 | print(f"Added {len(parsed_data)} Q&A pairs to Elasticsearch index '{args.index}'.") 163 | else: 164 | print(json.dumps(parsed_data, indent=4, ensure_ascii=False)) 165 | else: 166 | print("Please provide a file path or enable interactive mode.") 167 | sys.exit(1) 168 | 169 | if __name__ == "__main__": 170 | main() 171 | 172 | 173 | # import os 174 | # import json 175 | # import sys 176 | # from elasticsearch import Elasticsearch 177 | # from argparse import ArgumentParser 178 | 179 | # def parse_qa_text(file_path): 180 | # qa_pairs = [] 181 | # with open(file_path, 'r', encoding='utf-8') as file: 182 | # content = file.read() 183 | # qa_blocks = content.split('###') 184 | # for block in qa_blocks: 185 | # lines = block.strip().split('\n') 186 | # if len(lines) < 2: 187 | # continue 188 | # current_pair = {'question': '', 'answer': '', 'references': ''} 189 | # is_answer = False 190 | # for line in lines: 191 | # if line.startswith('> '): 192 | # if current_pair['question'] and current_pair['answer']: 193 | # qa_pairs.append(current_pair) 194 | # current_pair = {'question': '', 'answer': '', 'references': ''} 195 | # current_pair['question'] = line[2:].strip() 196 | # is_answer = False 197 | # elif line.startswith('>> '): 198 | # is_answer = True 199 | # if current_pair['answer']: 200 | # current_pair['answer'] += '\n' 201 | # current_pair['answer'] += line[3:].strip() 202 | # elif line.startswith('## '): 203 | # current_pair['references'] = line[3:].strip() 204 | # elif is_answer: 205 | # current_pair['answer'] += '\n' + line.strip() 206 | # if current_pair['question'] and current_pair['answer']: 207 | # qa_pairs.append(current_pair) 208 | # return qa_pairs 209 | 210 | # def add_to_index(es, index, qa_pairs, backup_file): 211 | # for pair in qa_pairs: 212 | # es.index(index=index, body=pair) 213 | # backup_to_json(backup_file, qa_pairs) # Call backup function after adding to Elasticsearch 214 | 215 | # def interactive_mode(es, index, backup_file): 216 | # while True: 217 | # mode = input("Choose mode - [s]ingle question, [m]ulti-question, [b]atch input (or type 'exit' to finish): ") 218 | # if mode.lower() == 'exit': 219 | # break 220 | 221 | # questions = [] 222 | # if mode.lower() == 's': 223 | # question = input("Enter your question: ") 224 | # if question.strip(): # Ensure the question is not empty or whitespace 225 | # questions.append(question) 226 | # elif mode.lower() == 'm' or mode.lower() == 'b': 227 | # prompt_text = "Enter your questions, one per line. When finished, press Enter on an empty line:" if mode.lower() == 'b' else "Enter your question (or type 'done' to finish questions): " 228 | # print(prompt_text) if mode.lower() == 'b' else None 229 | # while True: 230 | # question = input() if mode.lower() == 'b' else input("Enter your question (or type 'done' to finish questions): ") 231 | # if question == "" and mode.lower() == 'b': # End input for batch mode on empty line 232 | # break 233 | # if question.lower() == 'done' and mode.lower() == 'm': # End input for multi-question mode on 'done' 234 | # break 235 | # if question.strip(): # Ignore empty or whitespace-only lines 236 | # questions.append(question.strip()) 237 | 238 | # if not questions: 239 | # print("No questions entered. Skipping to next entry.") 240 | # continue 241 | 242 | # answer = input("Enter the answer: ") 243 | # references = input("Enter any references (optional): ") 244 | # qa_pairs = [{'question': q, 'answer': answer, 'references': references} for q in questions] 245 | 246 | # for pair in qa_pairs: 247 | # print("\nQ&A pair generated:") 248 | # print("<" + "-"*72 + ">") 249 | # print("Q:", pair["question"]) 250 | # print("A:", pair["answer"]) 251 | # if references: 252 | # print("Ref:", references) 253 | # print("<" + "-"*72 + ">") 254 | 255 | # confirm = input("Add to index (y/n)? ") 256 | # if confirm.lower() == 'y': 257 | # add_to_index(es, index, qa_pairs, backup_file) 258 | # print(f"Added {len(qa_pairs)} Q&A pairs to Elasticsearch index '{index}' and backed up to JSON file.") 259 | # else: 260 | # print("No Q&A pairs were added.") 261 | 262 | # # backup generated Q&A pairs to a JSON file 263 | # def backup_to_json(file_path, qa_pairs): 264 | # try: 265 | # data = [] 266 | # if os.path.exists(file_path): 267 | # with open(file_path, 'r', encoding='utf-8') as file: 268 | # data = json.load(file) 269 | # data.extend(qa_pairs) 270 | # with open(file_path, 'w', encoding='utf-8') as file: 271 | # json.dump(data, file, indent=4, ensure_ascii=False) 272 | # except Exception as e: 273 | # print(f"Failed to backup Q&A pairs to JSON: {e}") 274 | 275 | # def main(): 276 | 277 | # # define the backup file for q&a's created 278 | # backup_file = "./backup_file.json" 279 | 280 | # parser = ArgumentParser(description="Parse Q&A text and optionally add to Elasticsearch index.") 281 | # parser.add_argument("file_path", nargs='?', help="Path to the Q&A text file.", default=None) 282 | # parser.add_argument("--addtoindex", action="store_true", help="If set, add parsed Q&A pairs to Elasticsearch index.") 283 | # parser.add_argument("--index", default="tg-bot-rag-index", help="Elasticsearch index name. Default is 'tg-bot-rag-index'.") 284 | # parser.add_argument("--interactive", action="store_true", help="Enable interactive mode to add Q&A pairs.") 285 | # args = parser.parse_args() 286 | 287 | # if args.interactive: 288 | # es = Elasticsearch(["http://localhost:9200"]) 289 | # if not es.ping(): 290 | # print("Could not connect to Elasticsearch.") 291 | # sys.exit(1) 292 | # interactive_mode(es, args.index, backup_file) 293 | # elif args.file_path: 294 | # parsed_data = parse_qa_text(args.file_path) 295 | # if args.addtoindex: 296 | # print("Q&A pairs generated:") 297 | # for pair in parsed_data: 298 | # print("<" + "-"*72 + ">") 299 | # print("Q:", pair["question"]) 300 | # print("A:", pair["answer"]) 301 | # if pair["references"]: 302 | # print("Ref:", pair["references"]) 303 | # print("<" + "-"*72 + ">\n") 304 | 305 | # confirm = input("Add to index (y/n)? ") 306 | # if confirm.lower() != 'y': 307 | # print("Operation cancelled by the user.") 308 | # sys.exit(0) 309 | 310 | # es = Elasticsearch(["http://localhost:9200"]) 311 | # if not es.ping(): 312 | # print("Could not connect to Elasticsearch.") 313 | # sys.exit(1) 314 | # add_to_index(es, args.index, parsed_data) 315 | # print(f"Added {len(parsed_data)} Q&A pairs to Elasticsearch index '{args.index}'.") 316 | # else: 317 | # print(json.dumps(parsed_data, indent=4, ensure_ascii=False)) 318 | # else: 319 | # print("Please provide a file path or enable interactive mode.") 320 | # sys.exit(1) 321 | 322 | # if __name__ == "__main__": 323 | # main() 324 | 325 | # # old code for reference => 326 | # """ def interactive_mode(es, index): 327 | # qa_pairs = [] 328 | # while True: 329 | # question = input("Enter your question (or type 'exit' to finish): ") 330 | # if question == 'exit': 331 | # break 332 | # answer = input("Enter the answer: ") 333 | # references = input("Enter any references (optional): ") 334 | # print("\nQ&A pair generated:") 335 | # print("<" + "-"*72 + ">") 336 | # print("Q:", question) 337 | # print("A:", answer) 338 | # if references: 339 | # print("Ref:", references) 340 | # print("<" + "-"*72 + ">") 341 | 342 | # confirm = input("Add to index (y/n)? ") 343 | # if confirm.lower() == 'y': 344 | # qa_pairs.append({'question': question, 'answer': answer, 'references': references}) 345 | 346 | # if qa_pairs: 347 | # add_to_index(es, index, qa_pairs) 348 | # print(f"Added {len(qa_pairs)} Q&A pairs to Elasticsearch index '{index}'.") 349 | # else: 350 | # print("No Q&A pairs were added.") """ -------------------------------------------------------------------------------- /src/bot_commands.py: -------------------------------------------------------------------------------- 1 | # bot_commands.py 2 | # for telegram 3 | from telegram import Update, Bot 4 | from telegram.ext import Application, MessageHandler, filters, CommandHandler, CallbackContext 5 | from telegram.constants import ParseMode 6 | from telegram.helpers import escape_markdown 7 | from functools import partial 8 | 9 | import json 10 | import os 11 | import datetime 12 | import logging 13 | 14 | # bot's modules 15 | from config_paths import CONFIG_PATH 16 | from token_usage_visualization import generate_usage_chart 17 | from modules import reset_token_usage_at_midnight 18 | 19 | # ~~~~~~~~~~~~~~ 20 | # admin commands 21 | # ~~~~~~~~~~~~~~ 22 | 23 | # /admin (admin commands help menu) 24 | async def admin_command(update: Update, context: CallbackContext, bot_owner_id): 25 | if bot_owner_id == '0': 26 | await update.message.reply_text("The /admin command is disabled.") 27 | return 28 | 29 | if str(update.message.from_user.id) == bot_owner_id: 30 | admin_commands = """ 31 | Admin Commands: 32 | - /viewconfig: View the bot configuration (from config.ini). 33 | - /usage: View the bot's daily token usage in plain text. 34 | - /usagechart: View the bot's daily token usage as a chart. 35 | - /reset: Reset the bot's context memory. 36 | - /resetsystemmessage: Reset the system message from config.ini. 37 | - /setsystemmessage <system message>: Set a new system message (note: not saved into config). 38 | """ 39 | await update.message.reply_text(admin_commands, parse_mode=ParseMode.HTML) 40 | else: 41 | await update.message.reply_text("You are not authorized to use this command.") 42 | 43 | # /restart (admin command) 44 | async def restart_command(update: Update, context: CallbackContext, bot_owner_id): 45 | if bot_owner_id == '0': 46 | await update.message.reply_text("The /restart command is disabled.") 47 | return 48 | 49 | if str(update.message.from_user.id) == bot_owner_id: 50 | # WIP: Implement restart logic here 51 | await update.message.reply_text("Restarting the bot...") 52 | else: 53 | await update.message.reply_text("You are not authorized to use this command.") 54 | 55 | # /resetdailytokens (admin command for resetting daily token usage) 56 | async def reset_daily_tokens_command(update: Update, context: CallbackContext, bot_instance): 57 | user_id = update.message.from_user.id 58 | if bot_instance.bot_owner_id == '0' or str(user_id) != bot_instance.bot_owner_id: 59 | logging.info(f"User {user_id} tried to use /resetdailytokens but was not authorized.") 60 | await update.message.reply_text("You are not authorized to use this command.") 61 | return 62 | 63 | try: 64 | 65 | # (old fallback method, JIC) 66 | # Reset the in-memory token usage counter 67 | # bot_instance.total_token_usage = 0 68 | # logging.info("In-memory token usage counter reset.") 69 | 70 | # Pass the reset_total_token_usage method as a callback to reset_token_usage_at_midnight 71 | reset_token_usage_at_midnight(bot_instance.token_usage_file, bot_instance.reset_total_token_usage) 72 | logging.info(f"User {user_id} has reset the daily token usage, including the in-memory token usage counter.") 73 | await update.message.reply_text("Daily token usage has been reset, including the in-memory token usage counter.") 74 | 75 | except Exception as e: 76 | logging.error(f"Failed to reset daily token usage: {e}") 77 | await update.message.reply_text("Failed to reset daily token usage.") 78 | 79 | # /resetsystemmessage (admin command) 80 | async def reset_system_message_command(update: Update, context: CallbackContext, bot_instance): 81 | user_id = update.message.from_user.id 82 | if bot_instance.bot_owner_id == '0' or str(user_id) != bot_instance.bot_owner_id: 83 | logging.info(f"User {user_id} tried to use /resetsystemmessage but was not authorized.") 84 | await update.message.reply_text("You are not authorized to use this command.") 85 | return 86 | 87 | old_system_message = bot_instance.system_instructions 88 | bot_instance.system_instructions = bot_instance.config.get('SystemInstructions', 'You are an OpenAI API-based chatbot on Telegram.') 89 | logging.info(f"User {user_id} reset the system message to default.") 90 | await update.message.reply_text(f"System message reset to default.\n\nOld Message:\n{old_system_message}\n----------------------\nNew Default Message:\n{bot_instance.system_instructions}", parse_mode=ParseMode.HTML) 91 | 92 | # /setsystemmessage (admin command) 93 | async def set_system_message_command(update: Update, context: CallbackContext, bot_instance): 94 | user_id = update.message.from_user.id 95 | if bot_instance.bot_owner_id == '0' or str(user_id) != bot_instance.bot_owner_id: 96 | logging.info(f"User {user_id} tried to use /setsystemmessage but was not authorized.") 97 | await update.message.reply_text("You are not authorized to use this command.") 98 | return 99 | 100 | new_system_message = ' '.join(context.args) 101 | if new_system_message: 102 | old_system_message = bot_instance.system_instructions 103 | bot_instance.system_instructions = new_system_message 104 | logging.info(f"User {user_id} updated the system message to: {new_system_message}") 105 | await update.message.reply_text(f"System message updated.\n\nOld Message: {old_system_message}\nNew Message: {new_system_message}", parse_mode=ParseMode.HTML) 106 | else: 107 | logging.info(f"User {user_id} attempted to set system message but provided no new message.") 108 | await update.message.reply_text("Please provide the new system message in the command line, i.e.: /setsystemmessage My new system message to the AI on what it is, where it is, etc.") 109 | 110 | 111 | # /usage (admin command) 112 | # bot_commands.py 113 | async def usage_command(update: Update, context: CallbackContext): 114 | bot_instance = context.bot_data.get('bot_instance') # Retrieve the bot instance from context 115 | 116 | if not bot_instance: 117 | await update.message.reply_text("Internal error: Bot instance not found.") 118 | logging.error("Bot instance not found in context.bot_data") 119 | return 120 | 121 | logging.info(f"User {update.message.from_user.id} invoked /usage command") 122 | 123 | if bot_instance.bot_owner_id == '0': 124 | await update.message.reply_text("The `/usage` command is disabled.") 125 | logging.info("Usage command is disabled until a bot owner is defined in `config.ini`.") 126 | return 127 | 128 | if str(update.message.from_user.id) != bot_instance.bot_owner_id: 129 | await update.message.reply_text("You don't have permission to use this command.") 130 | logging.info(f"User {update.message.from_user.id} does not have permission to use /usage") 131 | return 132 | 133 | # Correct path to token_usage.json inside logs/ directory 134 | # token_usage_file = os.path.join(bot_instance.data_directory, 'logs', 'token_usage.json') 135 | token_usage_file = os.path.join(bot_instance.logs_directory, 'token_usage.json') 136 | 137 | logging.info(f"Looking for token usage file at: {token_usage_file}") 138 | current_date = datetime.datetime.utcnow() 139 | 140 | try: 141 | if os.path.exists(token_usage_file): 142 | with open(token_usage_file, 'r') as file: 143 | token_usage_history = json.load(file) 144 | logging.info("Loaded token usage history successfully") 145 | 146 | # Prune token usage history 147 | cutoff_date = current_date - datetime.timedelta(days=bot_instance.max_history_days) 148 | token_usage_history = { 149 | date: usage for date, usage in token_usage_history.items() 150 | if datetime.datetime.strptime(date, '%Y-%m-%d') >= cutoff_date 151 | } 152 | logging.info("Pruned token usage history based on cutoff date") 153 | else: 154 | token_usage_history = {} 155 | logging.warning(f"Token usage file does not exist at: {token_usage_file}") 156 | except json.JSONDecodeError: 157 | await update.message.reply_text("Error reading token usage history.") 158 | logging.error("JSONDecodeError while reading token_usage.json") 159 | return 160 | except Exception as e: 161 | await update.message.reply_text(f"An unexpected error occurred: {e}") 162 | logging.error(f"Unexpected error in usage_command: {e}") 163 | return 164 | 165 | today_usage = token_usage_history.get(current_date.strftime('%Y-%m-%d'), 0) 166 | token_cap_info = ( 167 | f"Today's usage: {today_usage} tokens\n" 168 | f"Daily token cap: {'No cap' if bot_instance.max_tokens_config == 0 else f'{bot_instance.max_tokens_config} tokens'}\n\n" 169 | "Token Usage History:\n" 170 | ) 171 | 172 | for date, usage in sorted(token_usage_history.items()): 173 | token_cap_info += f"{date}: {usage} tokens\n" 174 | 175 | await update.message.reply_text(token_cap_info) 176 | logging.info("Sent usage information to user") 177 | 178 | # /usagechart (admin command) 179 | async def usage_chart_command(update: Update, context: CallbackContext): 180 | bot_instance = context.bot_data.get('bot_instance') # Retrieve the bot instance from context 181 | 182 | if not bot_instance: 183 | await update.message.reply_text("Internal error: Bot instance not found.") 184 | logging.error("Bot instance not found in context.bot_data") 185 | return 186 | 187 | logging.info(f"User {update.message.from_user.id} invoked /usagechart command") 188 | 189 | if bot_instance.bot_owner_id == '0': 190 | await update.message.reply_text("The `/usagechart` command is disabled.") 191 | logging.info("Usagechart command is disabled") 192 | return 193 | 194 | if str(update.message.from_user.id) != bot_instance.bot_owner_id: 195 | await update.message.reply_text("You don't have permission to use this command.") 196 | logging.info(f"User {update.message.from_user.id} does not have permission to use /usagechart") 197 | return 198 | 199 | # Define paths 200 | token_usage_file = os.path.join(bot_instance.logs_directory, 'token_usage.json') 201 | output_image_file = os.path.join(bot_instance.data_directory, 'token_usage_chart.png') 202 | 203 | logging.info(f"Looking for token usage file at: {token_usage_file}") 204 | logging.info(f"Output image file will be at: {output_image_file}") 205 | 206 | # Ensure the data directory exists 207 | try: 208 | if not os.path.exists(bot_instance.data_directory): 209 | os.makedirs(bot_instance.data_directory, exist_ok=True) 210 | bot_instance.logger.info(f"Created data directory at {bot_instance.data_directory}") 211 | except OSError as e: 212 | bot_instance.logger.error(f"Failed to create data directory {bot_instance.data_directory}: {e}") 213 | await update.message.reply_text(f"Failed to create the data directory for the chart. Please check the bot's permissions.") 214 | return 215 | 216 | # Generate the usage chart 217 | try: 218 | generate_usage_chart(token_usage_file, output_image_file) 219 | bot_instance.logger.info(f"Generated usage chart at {output_image_file}") 220 | except Exception as e: 221 | bot_instance.logger.error(f"Failed to generate usage chart: {e}") 222 | await update.message.reply_text("Failed to generate usage chart.") 223 | return 224 | 225 | # Try to open and send the generated chart image 226 | try: 227 | with open(output_image_file, 'rb') as file: 228 | await context.bot.send_photo(chat_id=update.message.chat_id, photo=file) 229 | bot_instance.logger.info(f"Sent usage chart to chat_id {update.message.chat_id}") 230 | except FileNotFoundError: 231 | await update.message.reply_text("Token usage chart not found. Please ensure it's being generated correctly.") 232 | bot_instance.logger.error("Token usage chart file not found: %s", output_image_file) 233 | except Exception as e: 234 | await update.message.reply_text("Failed to send the usage chart.") 235 | bot_instance.logger.error(f"Error sending usage chart: {e}") 236 | 237 | # /reset 238 | async def reset_command(update: Update, context: CallbackContext, bot_owner_id, reset_enabled, admin_only_reset): 239 | # Check if the /reset command is enabled 240 | if not reset_enabled: 241 | logging.info(f"User tried to use the /reset command, but it was disabled.") 242 | await update.message.reply_text("The /reset command is disabled.") 243 | return 244 | 245 | # Check if the command is admin-only and if the user is the admin 246 | if admin_only_reset and str(update.message.from_user.id) != bot_owner_id: 247 | logging.info(f"User tried to use the /reset command, but was not authorized to do so.") 248 | await update.message.reply_text("You are not authorized to use this command.") 249 | return 250 | 251 | # If the user is authorized, or if the command is not admin-only 252 | if 'chat_history' in context.chat_data: 253 | context.chat_data['chat_history'] = [] 254 | logging.info(f"Memory context was reset successfully with: /reset") 255 | await update.message.reply_text("Memory context reset successfully.") 256 | else: 257 | logging.info(f"No memory context to reset with: /reset") 258 | await update.message.reply_text("No memory context to reset.") 259 | 260 | # /viewconfig (admin command) 261 | async def view_config_command(update: Update, context: CallbackContext, bot_owner_id): 262 | user_id = update.message.from_user.id # Retrieve the user_id 263 | 264 | if bot_owner_id == '0': 265 | logging.info(f"User {user_id} attempted to view the config with: /viewconfig -- command disabled") 266 | await update.message.reply_text("The /viewconfig command is disabled.") 267 | return 268 | 269 | if str(user_id) == bot_owner_id: 270 | try: 271 | config_contents = "
"
272 |             with open(CONFIG_PATH, 'r') as file:
273 |                 for line in file:
274 |                     if not line.strip() or line.strip().startswith('#'):
275 |                         continue
276 |                     # Escape HTML special characters
277 |                     line = line.replace('&', '&').replace('<', '<').replace('>', '>')
278 |                     config_contents += line
279 |             config_contents += "
" 280 | logging.info(f"User {user_id} (owner) viewed the config with: /viewconfig") 281 | if config_contents: 282 | await update.message.reply_text(config_contents, parse_mode=ParseMode.HTML) 283 | else: 284 | logging.info(f"[WARNING] User {user_id} attempted to view the config with: /viewconfig -- no configuration settings were available") 285 | await update.message.reply_text("No configuration settings available.") 286 | except Exception as e: 287 | logging.info(f"[ERROR] User {user_id} attempted to view the config with: /viewconfig -- there was an error reading the config file: {e}") 288 | await update.message.reply_text(f"Error reading configuration file: {e}") 289 | else: 290 | logging.info(f"[ATTENTION] User {user_id} attempted to view the config with: /viewconfig -- access denied") 291 | await update.message.reply_text("You are not authorized to use this command.") 292 | 293 | # ~~~~~~~~~~~~~ 294 | # user commands 295 | # ~~~~~~~~~~~~~ 296 | 297 | # /start 298 | async def start(update: Update, context: CallbackContext, start_command_response): 299 | await update.message.reply_text(start_command_response) 300 | 301 | # /about 302 | async def about_command(update: Update, context: CallbackContext, version_number): 303 | about_text = f""" 304 | 🤖 TelegramBot-OpenAI-API ⚡️ Powered by ChatKeke 🚀 305 | This is an OpenAI-powered Telegram chatbot created by FlyingFathead. 306 | Version: v{version_number} 307 | For more information, visit: https://github.com/FlyingFathead/TelegramBot-OpenAI-API 308 | (The original author is NOT responsible for any chatbots created using the code) 309 | """ 310 | await update.message.reply_text(about_text) 311 | 312 | # /help 313 | async def help_command(update: Update, context: CallbackContext, reset_enabled, admin_only_reset): 314 | help_text = """ 315 | Welcome to this OpenAI API-powered chatbot! Here are some commands you can use: 316 | 317 | - /start: Start a conversation with the bot. 318 | - /help: Display this help message. 319 | - /about: Learn more about this bot. 320 | """ 321 | 322 | if reset_enabled: 323 | help_text += "- /reset: Reset the bot's context memory.\n" 324 | if admin_only_reset: 325 | help_text += " (Available to admin only)\n" 326 | 327 | help_text += "- /admin: (For bot owner only) Display admin commands.\n\nJust type your message to chat with the bot!" 328 | 329 | await update.message.reply_text(help_text) --------------------------------------------------------------------------------