├── modules ├── __init__.py ├── calculator.py ├── keyboard.py ├── chrome_shortcut.py ├── alarm.py ├── Whatsapp.py ├── news.py ├── search.py └── apps.py ├── .gitignore ├── requirements.txt ├── LICENSE ├── config.py ├── README.md └── app.py /modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore virtual environment folder 2 | venv/ 3 | 4 | # Ignore Python bytecode files 5 | *.pyc 6 | __pycache__ 7 | 8 | # Environment variables file 9 | .env -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyttsx3==2.90 2 | SpeechRecognition==3.10.0 3 | spacy==3.7.2 4 | pywhatkit==5.4 5 | wikipedia==1.4.0 6 | pyautogui==0.9.54 7 | pynput==1.7.6 8 | requests==2.31.0 9 | pyjokes==0.6.0 10 | python-dotenv==1.0.0 11 | wolframalpha==5.0.0 12 | pillow>=10.4.0 13 | dotenv==1.0.0 14 | pyjokes==0.6.0 15 | pywhatkit==5.4 16 | wolframalpha==5.0.0 -------------------------------------------------------------------------------- /modules/calculator.py: -------------------------------------------------------------------------------- 1 | import wolframalpha 2 | from config import speak, WOLFRAM_ALPHA_KEY 3 | 4 | def calculate(query: str) -> None: 5 | """Perform mathematical calculations using Wolfram Alpha.""" 6 | query = query.replace("calculate", "").replace("jarvis", "").strip() 7 | query = query.replace("multiply", "*").replace("plus", "+").replace("minus", "-").replace("divide", "/") 8 | try: 9 | client = wolframalpha.Client(WOLFRAM_ALPHA_KEY) 10 | result = next(client.query(query).results).text 11 | speak(f"The answer is {result}") 12 | print(result) 13 | except: 14 | speak("Sorry, I couldn't calculate that.") -------------------------------------------------------------------------------- /modules/keyboard.py: -------------------------------------------------------------------------------- 1 | from pynput.keyboard import Key, Controller 2 | from time import sleep 3 | from config import speak 4 | 5 | keyboard = Controller() 6 | 7 | def volume_up() -> None: 8 | """Increase system volume.""" 9 | for _ in range(5): 10 | keyboard.press(Key.media_volume_up) 11 | keyboard.release(Key.media_volume_up) 12 | sleep(0.1) 13 | speak("Volume increased.") 14 | 15 | def volume_down() -> None: 16 | """Decrease system volume.""" 17 | for _ in range(5): 18 | keyboard.press(Key.media_volume_down) 19 | keyboard.release(Key.media_volume_down) 20 | sleep(0.1) 21 | speak("Volume decreased.") -------------------------------------------------------------------------------- /modules/chrome_shortcut.py: -------------------------------------------------------------------------------- 1 | import pyautogui 2 | from config import speak 3 | 4 | def handle_shortcut(query: str) -> None: 5 | """Handle keyboard shortcuts for browser actions.""" 6 | shortcuts = { 7 | "select all": ("ctrl", "a"), 8 | "copy": ("ctrl", "c"), 9 | "cut": ("ctrl", "x"), 10 | "paste": ("ctrl", "v"), 11 | "pause": "space", 12 | "resume": "space", 13 | "forward": "right", 14 | "backward": "left", 15 | "change window": ("alt", "tab") 16 | } 17 | for cmd, keys in shortcuts.items(): 18 | if cmd in query: 19 | if isinstance(keys, tuple): 20 | pyautogui.hotkey(*keys) 21 | else: 22 | pyautogui.press(keys) 23 | speak(f"{cmd.capitalize()} executed.") 24 | break -------------------------------------------------------------------------------- /modules/alarm.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | from config import speak, ALARM_FILE 4 | 5 | def set_alarm(time_str: str) -> None: 6 | """Set an alarm for the specified time.""" 7 | with ALARM_FILE.open("a") as f: 8 | f.write(f"{time_str}\n") 9 | speak("Alarm set.") 10 | ring(time_str) 11 | 12 | def ring(time_str: str) -> None: 13 | """Check and trigger the alarm.""" 14 | alarm_time = time_str.replace(" and ", ":") 15 | while True: 16 | current_time = datetime.datetime.now().strftime("%H:%M:%S") 17 | if current_time.startswith(alarm_time): 18 | speak("Alarm ringing, sir!") 19 | os.startfile("music.mp3") # Ensure music.mp3 exists 20 | break 21 | datetime.datetime.now().sleep(1) 22 | with ALARM_FILE.open("w") as f: 23 | f.write("") -------------------------------------------------------------------------------- /modules/Whatsapp.py: -------------------------------------------------------------------------------- 1 | import pywhatkit 2 | from datetime import datetime, timedelta 3 | from config import speak 4 | 5 | def send_whatsapp_message() -> None: 6 | """Send a WhatsApp message.""" 7 | speak("Who do you want to message?") 8 | recipient = input("Enter recipient number (e.g., +919925336945) or name (Person 1, Person 2): ") 9 | contacts = { 10 | "person 1": "+919925336945", 11 | "person 2": "+91xxxxxxxxxx" # Add another contact 12 | } 13 | phone = contacts.get(recipient.lower(), recipient) 14 | speak("What's the message?") 15 | message = input("Enter the message: ") 16 | time_hour = int(datetime.now().strftime("%H")) 17 | time_min = int((datetime.now() + timedelta(minutes=2)).strftime("%M")) 18 | try: 19 | pywhatkit.sendwhatmsg(phone, message, time_hour, time_min) 20 | speak("Message scheduled.") 21 | except Exception as e: 22 | speak("Failed to send message.") 23 | print(e) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 rohanmistry231 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /modules/news.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from config import speak, NEWS_API_KEY 4 | 5 | def get_latest_news() -> None: 6 | """Fetch and read the latest news.""" 7 | api_urls = { 8 | "business": f"https://newsapi.org/v2/top-headlines?country=in&category=business&apiKey={NEWS_API_KEY}", 9 | "entertainment": f"https://newsapi.org/v2/top-headlines?country=in&category=entertainment&apiKey={NEWS_API_KEY}", 10 | "health": f"https://newsapi.org/v2/top-headlines?country=in&category=health&apiKey={NEWS_API_KEY}", 11 | "science": f"https://newsapi.org/v2/top-headlines?country=in&category=science&apiKey={NEWS_API_KEY}", 12 | "sports": f"https://newsapi.org/v2/top-headlines?country=in&category=sports&apiKey={NEWS_API_KEY}", 13 | "technology": f"https://newsapi.org/v2/top-headlines?country=in&category=technology&apiKey={NEWS_API_KEY}" 14 | } 15 | speak("Which news category? Business, health, technology, sports, entertainment, or science?") 16 | category = input("Type the category: ").lower() 17 | url = api_urls.get(category) 18 | if not url: 19 | speak("Invalid category.") 20 | return 21 | 22 | try: 23 | response = requests.get(url) 24 | news = json.loads(response.text) 25 | speak("Here are the latest headlines.") 26 | for article in news["articles"][:3]: # Limit to 3 articles 27 | title = article["title"] 28 | print(title) 29 | speak(title) 30 | print(f"More info: {article['url']}") 31 | if input("Continue? (y/n): ").lower() != "y": 32 | break 33 | speak("That's all for now.") 34 | except Exception as e: 35 | speak("Failed to fetch news.") 36 | print(e) -------------------------------------------------------------------------------- /modules/search.py: -------------------------------------------------------------------------------- 1 | import pywhatkit 2 | import wikipedia 3 | import webbrowser 4 | from config import speak 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | def search_query(query: str) -> None: 10 | """Handle search queries for Google, YouTube, or Wikipedia.""" 11 | query = query.lower().strip() 12 | 13 | if "google" in query: 14 | query = query.replace("google search", "").replace("google", "").strip() 15 | speak("Searching Google...") 16 | try: 17 | pywhatkit.search(query) 18 | result = wikipedia.summary(query, sentences=1, auto_suggest=False) 19 | speak(result) 20 | except Exception as e: 21 | logger.error(f"Google search error: {e}") 22 | speak("No additional information available.") 23 | 24 | elif "youtube" in query: 25 | query = query.replace("youtube search", "").replace("youtube", "").strip() 26 | speak("Searching YouTube...") 27 | try: 28 | webbrowser.open(f"https://www.youtube.com/results?search_query={query}") 29 | pywhatkit.playonyt(query) 30 | except Exception as e: 31 | logger.error(f"YouTube search error: {e}") 32 | speak("Failed to search YouTube.") 33 | 34 | elif "wikipedia" in query or "what is" in query or "who is" in query: 35 | query = query.replace("wikipedia", "").replace("search wikipedia", "").replace("what is", "").replace("who is", "").strip() 36 | speak("Searching Wikipedia...") 37 | try: 38 | results = wikipedia.summary(query, sentences=2, auto_suggest=False) 39 | print(results) 40 | speak(results) 41 | except Exception as e: 42 | logger.error(f"Wikipedia search error: {e}") 43 | speak("No results found on Wikipedia.") -------------------------------------------------------------------------------- /modules/apps.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pyautogui 3 | import webbrowser 4 | from time import sleep 5 | from config import speak, APP_MAPPINGS, WEBSITE_MAPPINGS 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | def open_app(query: str) -> None: 11 | """Open an application or website.""" 12 | speak("Launching...") 13 | query = query.lower().replace("open", "").replace("hey", "").strip() 14 | 15 | # Check for website 16 | for site, url in WEBSITE_MAPPINGS.items(): 17 | if site in query: 18 | webbrowser.open(f"https://{url}") 19 | speak(f"Opening {site}") 20 | return 21 | 22 | # Check for application 23 | for app, path in APP_MAPPINGS.items(): 24 | if app in query: 25 | try: 26 | if path.endswith(".exe"): 27 | os.startfile(path) 28 | else: 29 | os.system(f"start {path}") 30 | speak(f"Opening {app}") 31 | except Exception as e: 32 | logger.error(f"Failed to open {app}: {e}") 33 | speak(f"Failed to open {app}") 34 | return 35 | 36 | # Fallback for generic website 37 | if any(x in query for x in [".com", ".co.in", ".org"]): 38 | webbrowser.open(f"https://www.{query}") 39 | speak(f"Opening {query}") 40 | else: 41 | speak("Application or website not recognized.") 42 | 43 | def close_app(query: str) -> None: 44 | """Close an application or browser tabs.""" 45 | speak("Closing...") 46 | query = query.lower() 47 | if "tab" in query: 48 | num_tabs = int(query.split("tab")[0].strip()) if query.split("tab")[0].strip().isdigit() else 1 49 | for _ in range(num_tabs): 50 | pyautogui.hotkey("ctrl", "w") 51 | sleep(0.5) 52 | speak("Tabs closed.") 53 | else: 54 | for app, path in APP_MAPPINGS.items(): 55 | if app in query and not path.endswith(".exe"): 56 | try: 57 | os.system(f"taskkill /f /im {path}.exe") 58 | speak(f"Closed {app}") 59 | except Exception as e: 60 | logger.error(f"Failed to close {app}: {e}") 61 | speak(f"Failed to close {app}") 62 | return 63 | speak("Application not recognized.") -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | 5 | # Load environment variables from .env file 6 | load_dotenv() 7 | 8 | # Project directories 9 | BASE_DIR = Path(__file__).resolve().parent 10 | DATA_DIR = BASE_DIR / "data" 11 | 12 | # API keys (load from environment variables) 13 | WOLFRAM_ALPHA_KEY = os.getenv("WOLFRAM_ALPHA_KEY", "3WR2KP-4U9X329R9U") 14 | NEWS_API_KEY = os.getenv("NEWS_API_KEY", "1a1ac9b8505341478289bc11a04eb056") 15 | EMAIL_USER = os.getenv("EMAIL_USER", "userid@gmail.com") 16 | EMAIL_PASS = os.getenv("EMAIL_PASS", "your_password") 17 | 18 | # Voice settings 19 | VOICE_ID = 0 20 | SPEECH_RATE = 170 21 | 22 | # File paths 23 | ALARM_FILE = DATA_DIR / "alarm.txt" 24 | NOTES_FILE = DATA_DIR / "notes.txt" 25 | REMEMBER_FILE = DATA_DIR / "remember.txt" 26 | 27 | # Application mappings 28 | APP_MAPPINGS = { 29 | "commandprompt": "cmd", 30 | "paint": "paint", 31 | "word": "winword", 32 | "excel": "excel", 33 | "chrome": "chrome", 34 | "vscode": "code", 35 | "powerpoint": "powerpnt", 36 | "youtube music": r"C:\Users\Rohan S Mistry\AppData\Local\Programs\youtube-music-desktop-app\YouTube Music Desktop App.exe", 37 | "vs code": r"C:\Users\Rohan S Mistry\AppData\Local\Programs\Microsoft VS Code\Code.exe" 38 | } 39 | 40 | # Website mappings 41 | WEBSITE_MAPPINGS = { 42 | "google": "google.com", 43 | "youtube": "youtube.com", 44 | "facebook": "facebook.com", 45 | "twitter": "twitter.com", 46 | "wikipedia": "wikipedia.com", 47 | "instagram": "instagram.com", 48 | "baidu": "baidu.com", 49 | "yahoo": "yahoo.com", 50 | "yandex": "yandex.ru", 51 | "whatsapp": "whatsapp.com", 52 | "amazon": "amazon.com", 53 | "zoom": "zoom.us", 54 | "live": "live.com", 55 | "netflix": "netflix.com", 56 | "yahoo japan": "yahoo.co.jp", 57 | "vk": "vk.com", 58 | "reddit": "reddit.com", 59 | "office": "office.com", 60 | "naver": "naver.com", 61 | "pinterest": "pinterest.com", 62 | "discord": "discord.com", 63 | "linkedin": "linkedin.com", 64 | "cnn": "cnn.com", 65 | "microsoft": "microsoft.com", 66 | "mail": "mail.ru", 67 | "globo": "globo.com", 68 | "bing": "bing.com", 69 | "twitch": "twitch.tv", 70 | "google brazil": "google.com.br", 71 | "qq": "qq.com", 72 | "microsoft online": "microsoftonline.com", 73 | "ebay": "ebay.com", 74 | "msn": "msn.com", 75 | "yahoo news japan": "news.yahoo.co.jp", 76 | "duckduckgo": "duckduckgo.com", 77 | "ok": "ok.ru", 78 | "walmart": "walmart.com", 79 | "bilibili": "bilibili.com", 80 | "tiktok": "tiktok.com", 81 | "paypal": "paypal.com", 82 | "google germany": "google.de", 83 | "amazon japan": "amazon.co.jp", 84 | "aliexpress": "aliexpress.com", 85 | "amazon germany": "amazon.de", 86 | "rakuten japan": "rakuten.co.jp", 87 | "amazon uk": "amazon.co.uk" 88 | } 89 | 90 | def speak(text: str) -> None: 91 | """Speak the provided text using the text-to-speech engine.""" 92 | import pyttsx3 93 | engine = pyttsx3.init("sapi5") 94 | voices = engine.getProperty("voices") 95 | engine.setProperty("voice", voices[VOICE_ID].id) 96 | engine.setProperty("rate", SPEECH_RATE) 97 | try: 98 | engine.say(text) 99 | engine.runAndWait() 100 | except Exception as e: 101 | import logging 102 | logging.error(f"Speech error: {e}") -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OWN 1.0 - Personal Voice Assistant 2 | 3 | ![OWN 1.0 Logo](https://via.placeholder.com/150?text=OWN+1.0) 4 | 5 | OWN 1.0 is a modular, voice-activated personal assistant built in Python, designed to streamline daily tasks through natural language interaction. Inspired by fictional assistants like JARVIS, OWN 1.0 leverages speech recognition, NLP, and API integrations to perform tasks such as opening applications, searching the web, sending emails, setting alarms, and more. This project showcases modern software engineering practices, including modular design, error handling, and NLP integration, making it a standout portfolio piece for an AI/ML fresher. 6 | 7 | ## Features 8 | 9 | - **Voice Interaction**: Uses `pyttsx3` for text-to-speech and `SpeechRecognition` for voice input, enabling seamless user interaction. 10 | - **Natural Language Processing**: Integrates `spaCy` for intent recognition, improving command understanding. 11 | - **Web and App Control**: Opens websites (e.g., Google, YouTube, Netflix) and applications (e.g., VS Code, Notepad) with voice commands. 12 | - **Search Capabilities**: Performs Google, YouTube, and Wikipedia searches using `pywhatkit` and `wikipedia` APIs. 13 | - **Email and Messaging**: Sends emails via Gmail SMTP and WhatsApp messages using `pywhatkit`. 14 | - **System Control**: Manages system volume, locks the device, shuts down, and clears the recycle bin. 15 | - **Productivity Tools**: Sets alarms, takes notes, and remembers user inputs stored in text files. 16 | - **News Updates**: Fetches and reads latest news from various categories using the News API. 17 | - **Mathematical Calculations**: Solves math queries using Wolfram Alpha API. 18 | - **Keyboard Shortcuts**: Executes browser shortcuts (e.g., copy, paste, pause) via `pyautogui`. 19 | - **Extensibility**: Modular architecture allows easy addition of new features. 20 | 21 | ## Project Structure 22 | 23 | ``` 24 | voice_assistant/ 25 | ├── config.py # Configuration and environment variable management 26 | ├── app.py # Main application logic 27 | ├── modules/ # Modular feature implementations 28 | │ ├── chrome_shortcut.py # Browser shortcut handling 29 | │ ├── search.py # Web search functionalities 30 | │ ├── apps.py # Application and website launching 31 | │ ├── keyboard.py # System keyboard controls 32 | │ ├── news.py # News fetching and reading 33 | │ ├── calculator.py # Mathematical calculations 34 | │ ├── whatsapp.py # WhatsApp messaging 35 | │ ├── alarm.py # Alarm setting and ringing 36 | ├── requirements.txt # Project dependencies 37 | ├── README.md # Project documentation 38 | ``` 39 | 40 | ## Installation 41 | 42 | ### Prerequisites 43 | - Python 3.8+ 44 | - A microphone for voice input 45 | - Windows OS (due to `pyttsx3` SAPI5 and system-specific commands) 46 | - API keys for Wolfram Alpha and News API 47 | - Gmail account with app password for email functionality 48 | 49 | ### Steps 50 | 1. **Clone the Repository**: 51 | ```bash 52 | git clone https://github.com/rohanmistry231/voice-assistant.git 53 | cd voice-assistant 54 | ``` 55 | 56 | 2. **Set Up a Virtual Environment** (recommended): 57 | ```bash 58 | python -m venv venv 59 | source venv/bin/activate # On Windows: venv\Scripts\activate 60 | ``` 61 | 62 | 3. **Install Dependencies**: 63 | ```bash 64 | pip install -r requirements.txt 65 | ``` 66 | 67 | 4. **Install spaCy Model**: 68 | ```bash 69 | python -m spacy download en_core_web_sm 70 | ``` 71 | 72 | 5. **Configure Environment Variables**: 73 | Create a `.env` file in the project root with the following: 74 | ```env 75 | WOLFRAM_ALPHA_KEY=your_wolfram_alpha_key 76 | NEWS_API_KEY=your_news_api_key 77 | EMAIL_USER=your_email@gmail.com 78 | EMAIL_PASS=your_gmail_app_password 79 | ``` 80 | - Obtain API keys from [Wolfram Alpha](https://developer.wolframalpha.com/) and [News API](https://newsapi.org/). 81 | - Generate a Gmail app password at [Google Account Settings](https://myaccount.google.com/security). 82 | 83 | 6. **Ensure Music File for Alarms**: 84 | Place a `music.mp3` file in the project root or update `alarm.py` to point to an existing audio file. 85 | 86 | 7. **Run the Assistant**: 87 | ```bash 88 | python app.py 89 | ``` 90 | 91 | ## Usage 92 | 93 | 1. **Start the Assistant**: Run `app.py`. The assistant greets you based on the time of day and asks for your name. 94 | 2. **Issue Voice Commands**: Speak commands like: 95 | - "Open YouTube" 96 | - "Search Google for Python tutorials" 97 | - "Send an email to mama" 98 | - "Set an alarm for 10 and 30" 99 | - "Tell me a joke" 100 | - "What's the time?" 101 | - "Volume up" 102 | - "Show notes" 103 | 3. **Exit**: Say "exit" to stop the assistant. 104 | 105 | ### Example Commands 106 | | Command | Action | 107 | |---------|--------| 108 | | "Open Google" | Opens google.com in the default browser | 109 | | "Search Wikipedia for AI" | Reads a Wikipedia summary about AI | 110 | | "Play song" | Plays an MP3 file from the music directory | 111 | | "Email to user" | Prompts for email content and recipient | 112 | | "Calculate 2 plus 2" | Solves the math query using Wolfram Alpha | 113 | | "News" | Fetches latest news in a chosen category | 114 | | "Remember that meeting at 5 PM" | Saves the reminder to a file | 115 | 116 | ## Technical Details 117 | 118 | ### Technologies Used 119 | - **Python Libraries**: 120 | - `pyttsx3`: Text-to-speech for voice output 121 | - `SpeechRecognition`: Voice input via Google Speech API 122 | - `spaCy`: NLP for intent recognition 123 | - `pywhatkit`: YouTube and WhatsApp automation 124 | - `wikipedia`: Wikipedia summaries 125 | - `requests`: API calls for news 126 | - `wolframalpha`: Mathematical calculations 127 | - `pyautogui`: Keyboard and mouse automation 128 | - `pynput`: System volume control 129 | - `python-dotenv`: Environment variable management 130 | - **APIs**: 131 | - Wolfram Alpha API 132 | - News API 133 | - **Other**: 134 | - Modular architecture with separate modules for each feature 135 | - Logging for debugging and error tracking 136 | - Environment variables for secure configuration 137 | 138 | ### Key Features 139 | - **Modular Design**: Each feature (e.g., search, apps, news) is encapsulated in its own module, enhancing maintainability. 140 | - **NLP Integration**: `spaCy` processes user queries to identify intents (e.g., "open", "search"), improving command accuracy. 141 | - **Error Handling**: Comprehensive try-except blocks and logging ensure robustness. 142 | - **Security**: Sensitive data (API keys, email credentials) are stored in a `.env` file. 143 | - **Performance**: Optimized speech recognition with ambient noise adjustment and timeouts. 144 | 145 | ## Development Highlights 146 | 147 | This project demonstrates skills critical for an AI/ML fresher: 148 | - **AI/ML**: Integration of NLP with `spaCy` for intent recognition, showcasing understanding of natural language processing. 149 | - **Software Engineering**: Modular codebase, logging, and environment variable management reflect professional development practices. 150 | - **API Integration**: Seamless use of external APIs (Wolfram Alpha, News API) for enhanced functionality. 151 | - **Automation**: System control via `pyautogui` and `pynput` demonstrates automation capabilities. 152 | - **User Experience**: Context-aware greetings and clear voice feedback enhance usability. 153 | 154 | ## Future Enhancements 155 | 156 | - **GUI Interface**: Add a Tkinter or PyQt interface for visual interaction. 157 | - **Advanced NLP**: Integrate a transformer model (e.g., BERT) for better intent recognition. 158 | - **Wake Word Detection**: Implement wake-word activation (e.g., "Hey OWN") using Porcupine. 159 | - **Cross-Platform Support**: Adapt for Linux/Mac by replacing Windows-specific commands. 160 | - **Cloud Integration**: Store notes and reminders in a cloud database (e.g., Firebase). 161 | - **Multi-Language Support**: Add support for non-English voice commands. 162 | 163 | ## Contributing 164 | 165 | Contributions are welcome! To contribute: 166 | 1. Fork the repository. 167 | 2. Create a new branch (`git checkout -b feature/your-feature`). 168 | 3. Commit your changes (`git commit -m "Add your feature"`). 169 | 4. Push to the branch (`git push origin feature/your-feature`). 170 | 5. Open a pull request. 171 | 172 | Please ensure your code follows PEP 8 guidelines and includes appropriate tests. 173 | 174 | ## Acknowledgments 175 | 176 | - Inspired by fictional assistants like JARVIS from Iron Man. 177 | - Thanks to the open-source community for libraries like `pyttsx3`, `SpeechRecognition`, and `spaCy`. 178 | - Built as a portfolio project to demonstrate AI/ML and software engineering skills. 179 | 180 | ## Contact 181 | 182 | - **Author**: Rohan S. Mistry 183 | - **Email**: rohanmistry231@gmail.com 184 | - **LinkedIn**: [linkedin](https://www.linkedin.com/in/rohan-mistry-493987202/) 185 | 186 | Feel free to reach out for collaboration or feedback! 187 | 188 | --- 189 | 190 | *Built with ❤️ by Rohan S. Mistry* -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import pyttsx3 2 | import speech_recognition as sr 3 | import datetime 4 | from pathlib import Path 5 | import os 6 | import logging 7 | import shutil 8 | from modules import chrome_shortcut, search, apps, keyboard, news, calculator, whatsapp, alarm 9 | from config import VOICE_ID, SPEECH_RATE, ALARM_FILE, NOTES_FILE, REMEMBER_FILE, EMAIL_USER, EMAIL_PASS, APP_MAPPINGS, WEBSITE_MAPPINGS 10 | 11 | # Setup logging 12 | logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") 13 | logger = logging.getLogger(__name__) 14 | 15 | # Initialize text-to-speech engine 16 | engine = None 17 | voices = None 18 | try: 19 | engine = pyttsx3.init("sapi5") 20 | voices = engine.getProperty("voices") 21 | engine.setProperty("voice", voices[VOICE_ID].id) 22 | engine.setProperty("rate", SPEECH_RATE) 23 | except Exception as e: 24 | logger.error(f"Failed to initialize text-to-speech engine: {e}") 25 | 26 | def speak(text: str) -> None: 27 | """Speak the provided text using the text-to-speech engine.""" 28 | global engine, voices 29 | if engine is None or voices is None: 30 | logger.warning("Text-to-speech engine not initialized. Attempting to initialize...") 31 | try: 32 | engine = pyttsx3.init("sapi5") 33 | voices = engine.getProperty("voices") 34 | engine.setProperty("voice", voices[VOICE_ID].id) 35 | engine.setProperty("rate", SPEECH_RATE) 36 | except Exception as e: 37 | logger.error(f"Failed to initialize text-to-speech engine: {e}") 38 | return 39 | 40 | try: 41 | engine.say(text) 42 | engine.runAndWait() 43 | except TypeError as te: 44 | logger.error(f"TypeError in speech (likely comtypes issue): {te}") 45 | # Attempt to reinitialize the engine 46 | try: 47 | engine = pyttsx3.init("sapi5") 48 | voices = engine.getProperty("voices") 49 | engine.setProperty("voice", voices[VOICE_ID].id) 50 | engine.setProperty("rate", SPEECH_RATE) 51 | engine.say(text) 52 | engine.runAndWait() 53 | except Exception as e2: 54 | logger.error(f"Failed to reinitialize speech engine after TypeError: {e2}") 55 | except Exception as e: 56 | logger.error(f"Speech error: {e}") 57 | # Attempt to reinitialize the engine 58 | try: 59 | engine = pyttsx3.init("sapi5") 60 | voices = engine.getProperty("voices") 61 | engine.setProperty("voice", voices[VOICE_ID].id) 62 | engine.setProperty("rate", SPEECH_RATE) 63 | engine.say(text) 64 | engine.runAndWait() 65 | except Exception as e2: 66 | logger.error(f"Failed to reinitialize speech engine: {e2}") 67 | 68 | def take_command() -> str: 69 | """Capture voice input and convert to text.""" 70 | r = sr.Recognizer() 71 | with sr.Microphone() as source: 72 | logger.info("Listening...") 73 | r.adjust_for_ambient_noise(source, duration=0.1) 74 | r.energy_threshold = 300 75 | r.pause_threshold = 1 76 | try: 77 | audio = r.listen(source, timeout=5, phrase_time_limit=4) 78 | logger.info("Recognizing...") 79 | query = r.recognize_google(audio, language="en-in").lower() 80 | logger.info(f"User said: {query}") 81 | return query 82 | except sr.WaitTimeoutError: 83 | logger.warning("No speech detected.") 84 | return "none" 85 | except sr.UnknownValueError: 86 | logger.warning("Could not understand audio.") 87 | return "none" 88 | except Exception as e: 89 | logger.error(f"Recognition error: {e}") 90 | return "none" 91 | 92 | def wish_user() -> None: 93 | """Greet the user based on the time of day.""" 94 | hour = datetime.datetime.now().hour 95 | greetings = { 96 | (0, 12): "Good Morning", 97 | (12, 18): "Good Afternoon", 98 | (18, 24): "Good Evening" 99 | } 100 | greeting = next(g for h, g in greetings.items() if h[0] <= hour < h[1]) 101 | speak(f"{greeting} Sir! I am your Assistant, OWN 1.0.") 102 | 103 | def get_username() -> str: 104 | """Prompt for and return the user's name.""" 105 | speak("What should I call you, sir?") 106 | uname = take_command() 107 | if uname != "none": 108 | speak(f"Welcome, Mister {uname}") 109 | print(f"{'#'*20}\nWelcome Mr. {uname}\n{'#'*20}".center(shutil.get_terminal_size().columns)) 110 | return uname 111 | return "User" 112 | 113 | def send_email(to: str, content: str) -> bool: 114 | """Send an email to the specified recipient.""" 115 | try: 116 | import smtplib 117 | server = smtplib.SMTP("smtp.gmail.com", 587) 118 | server.ehlo() 119 | server.starttls() 120 | server.login(EMAIL_USER, EMAIL_PASS) 121 | server.sendmail(EMAIL_USER, to, content) 122 | server.close() 123 | return True 124 | except Exception as e: 125 | logger.error(f"Email error: {e}") 126 | return False 127 | 128 | def detect_intent(query: str) -> str: 129 | """Detect the intent of the query using keyword matching.""" 130 | query = query.lower() 131 | if any(word in query for word in ["open", "launch"]): 132 | return "open" 133 | elif any(word in query for word in ["close", "exit"]): 134 | return "close" 135 | elif any(word in query for word in ["search", "find", "google", "youtube", "wikipedia", "what is", "who is"]): 136 | return "search" 137 | elif any(word in query for word in ["play", "music", "song"]): 138 | return "play" 139 | elif any(word in query for word in ["email", "mail"]): 140 | return "email" 141 | elif any(word in query for word in ["joke", "funny"]): 142 | return "joke" 143 | return "unknown" 144 | 145 | def process_command(query: str, username: str) -> bool: 146 | """Process the user's voice command.""" 147 | intent = detect_intent(query) 148 | 149 | if intent == "open" or "open" in query: 150 | # Handle both apps and websites 151 | for app in APP_MAPPINGS: 152 | if app in query: 153 | apps.open_app(query) 154 | return True 155 | for site in WEBSITE_MAPPINGS: 156 | if site in query: 157 | apps.open_app(query) 158 | return True 159 | elif intent == "close" or "close" in query: 160 | apps.close_app(query) 161 | return True 162 | elif intent == "search": 163 | search.search_query(query) 164 | return True 165 | elif intent == "play" or "play song" in query: 166 | music_dir = Path("C:/Users/Rohan S Mistry/Music") 167 | songs = list(music_dir.glob("*.mp3")) 168 | if songs: 169 | os.startfile(songs[0]) 170 | speak("Playing music.") 171 | else: 172 | speak("No music files found.") 173 | return True 174 | elif intent == "email" or "email" in query: 175 | speak("What should I say?") 176 | content = take_command() 177 | if "mama" in query: 178 | to = "pritesh_02@yahoo.com" 179 | else: 180 | speak("To whom should I send the email?") 181 | to = take_command().replace(" ", "") 182 | if send_email(to, content): 183 | speak("Email sent successfully.") 184 | else: 185 | speak("Failed to send email.") 186 | return True 187 | elif intent == "joke" or "joke" in query: 188 | import pyjokes 189 | speak(pyjokes.get_joke()) 190 | return True 191 | elif "time" in query: 192 | current_time = datetime.datetime.now().strftime("%I:%M %p") 193 | speak(f"The time is {current_time}") 194 | return True 195 | elif "exit" in query: 196 | speak(f"Goodbye, {username}. Thanks for using OWN 1.0.") 197 | return False 198 | elif "note" in query: 199 | if "write" in query: 200 | speak("What should I write?") 201 | note = take_command() 202 | with NOTES_FILE.open("a") as f: 203 | f.write(f"{datetime.datetime.now().strftime('%I:%M %p')}: {note}\n") 204 | speak("Note saved.") 205 | elif "show" in query: 206 | with NOTES_FILE.open("r") as f: 207 | content = f.read() 208 | print(content) 209 | speak(content[:100]) # Read first 100 chars 210 | return True 211 | elif "remember" in query: 212 | if "what do you remember" in query: 213 | with REMEMBER_FILE.open("r") as f: 214 | speak(f"You told me to remember: {f.read()}") 215 | else: 216 | message = query.replace("remember that", "").strip() 217 | with REMEMBER_FILE.open("a") as f: 218 | f.write(f"{message}\n") 219 | speak(f"I'll remember: {message}") 220 | return True 221 | elif "alarm" in query: 222 | speak("Please tell the time for the alarm (e.g., 10 and 30)") 223 | time_input = take_command().replace(" and ", ":") 224 | alarm.set_alarm(time_input) 225 | return True 226 | elif "news" in query: 227 | news.get_latest_news() 228 | return True 229 | elif "whatsapp" in query: 230 | whatsapp.send_whatsapp_message() 231 | return True 232 | elif "calculate" in query: 233 | calculator.calculate(query) 234 | return True 235 | elif any(cmd in query for cmd in ["select all", "copy", "cut", "paste", "pause", "resume", "forward", "backward", "change window"]): 236 | chrome_shortcut.handle_shortcut(query) 237 | return True 238 | elif "volume up" in query: 239 | keyboard.volume_up() 240 | return True 241 | elif "volume down" in query: 242 | keyboard.volume_down() 243 | return True 244 | else: 245 | speak("Sorry, I didn't understand that command.") 246 | return True 247 | 248 | def main(): 249 | """Main function to run the voice assistant.""" 250 | wish_user() 251 | username = get_username() 252 | speak("How can I help you, sir?") 253 | 254 | while True: 255 | query = take_command() 256 | if query == "none": 257 | continue 258 | if not process_command(query, username): 259 | break 260 | 261 | if __name__ == "__main__": 262 | main() --------------------------------------------------------------------------------