├── modules
    ├── __init__.py
    ├── calculator.py
    ├── keyboard.py
    ├── chrome_shortcut.py
    ├── alarm.py
    ├── Whatsapp.py
    ├── news.py
    ├── search.py
    └── apps.py
├── .gitignore
├── requirements.txt
├── LICENSE
├── config.py
├── README.md
└── app.py


/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore virtual environment folder
2 | venv/
3 | 
4 | # Ignore Python bytecode files
5 | *.pyc
6 | __pycache__
7 | 
8 | # Environment variables file
9 | .env


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pyttsx3==2.90
 2 | SpeechRecognition==3.10.0
 3 | spacy==3.7.2
 4 | pywhatkit==5.4
 5 | wikipedia==1.4.0
 6 | pyautogui==0.9.54
 7 | pynput==1.7.6
 8 | requests==2.31.0
 9 | pyjokes==0.6.0
10 | python-dotenv==1.0.0
11 | wolframalpha==5.0.0
12 | pillow>=10.4.0
13 | dotenv==1.0.0
14 | pyjokes==0.6.0
15 | pywhatkit==5.4
16 | wolframalpha==5.0.0


--------------------------------------------------------------------------------
/modules/calculator.py:
--------------------------------------------------------------------------------
 1 | import wolframalpha
 2 | from config import speak, WOLFRAM_ALPHA_KEY
 3 | 
 4 | def calculate(query: str) -> None:
 5 |     """Perform mathematical calculations using Wolfram Alpha."""
 6 |     query = query.replace("calculate", "").replace("jarvis", "").strip()
 7 |     query = query.replace("multiply", "*").replace("plus", "+").replace("minus", "-").replace("divide", "/")
 8 |     try:
 9 |         client = wolframalpha.Client(WOLFRAM_ALPHA_KEY)
10 |         result = next(client.query(query).results).text
11 |         speak(f"The answer is {result}")
12 |         print(result)
13 |     except:
14 |         speak("Sorry, I couldn't calculate that.")


--------------------------------------------------------------------------------
/modules/keyboard.py:
--------------------------------------------------------------------------------
 1 | from pynput.keyboard import Key, Controller
 2 | from time import sleep
 3 | from config import speak
 4 | 
 5 | keyboard = Controller()
 6 | 
 7 | def volume_up() -> None:
 8 |     """Increase system volume."""
 9 |     for _ in range(5):
10 |         keyboard.press(Key.media_volume_up)
11 |         keyboard.release(Key.media_volume_up)
12 |         sleep(0.1)
13 |     speak("Volume increased.")
14 | 
15 | def volume_down() -> None:
16 |     """Decrease system volume."""
17 |     for _ in range(5):
18 |         keyboard.press(Key.media_volume_down)
19 |         keyboard.release(Key.media_volume_down)
20 |         sleep(0.1)
21 |     speak("Volume decreased.")


--------------------------------------------------------------------------------
/modules/chrome_shortcut.py:
--------------------------------------------------------------------------------
 1 | import pyautogui
 2 | from config import speak
 3 | 
 4 | def handle_shortcut(query: str) -> None:
 5 |     """Handle keyboard shortcuts for browser actions."""
 6 |     shortcuts = {
 7 |         "select all": ("ctrl", "a"),
 8 |         "copy": ("ctrl", "c"),
 9 |         "cut": ("ctrl", "x"),
10 |         "paste": ("ctrl", "v"),
11 |         "pause": "space",
12 |         "resume": "space",
13 |         "forward": "right",
14 |         "backward": "left",
15 |         "change window": ("alt", "tab")
16 |     }
17 |     for cmd, keys in shortcuts.items():
18 |         if cmd in query:
19 |             if isinstance(keys, tuple):
20 |                 pyautogui.hotkey(*keys)
21 |             else:
22 |                 pyautogui.press(keys)
23 |             speak(f"{cmd.capitalize()} executed.")
24 |             break


--------------------------------------------------------------------------------
/modules/alarm.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | from config import speak, ALARM_FILE
 4 | 
 5 | def set_alarm(time_str: str) -> None:
 6 |     """Set an alarm for the specified time."""
 7 |     with ALARM_FILE.open("a") as f:
 8 |         f.write(f"{time_str}\n")
 9 |     speak("Alarm set.")
10 |     ring(time_str)
11 | 
12 | def ring(time_str: str) -> None:
13 |     """Check and trigger the alarm."""
14 |     alarm_time = time_str.replace(" and ", ":")
15 |     while True:
16 |         current_time = datetime.datetime.now().strftime("%H:%M:%S")
17 |         if current_time.startswith(alarm_time):
18 |             speak("Alarm ringing, sir!")
19 |             os.startfile("music.mp3")  # Ensure music.mp3 exists
20 |             break
21 |         datetime.datetime.now().sleep(1)
22 |     with ALARM_FILE.open("w") as f:
23 |         f.write("")


--------------------------------------------------------------------------------
/modules/Whatsapp.py:
--------------------------------------------------------------------------------
 1 | import pywhatkit
 2 | from datetime import datetime, timedelta
 3 | from config import speak
 4 | 
 5 | def send_whatsapp_message() -> None:
 6 |     """Send a WhatsApp message."""
 7 |     speak("Who do you want to message?")
 8 |     recipient = input("Enter recipient number (e.g., +919925336945) or name (Person 1, Person 2): ")
 9 |     contacts = {
10 |         "person 1": "+919925336945",
11 |         "person 2": "+91xxxxxxxxxx"  # Add another contact
12 |     }
13 |     phone = contacts.get(recipient.lower(), recipient)
14 |     speak("What's the message?")
15 |     message = input("Enter the message: ")
16 |     time_hour = int(datetime.now().strftime("%H"))
17 |     time_min = int((datetime.now() + timedelta(minutes=2)).strftime("%M"))
18 |     try:
19 |         pywhatkit.sendwhatmsg(phone, message, time_hour, time_min)
20 |         speak("Message scheduled.")
21 |     except Exception as e:
22 |         speak("Failed to send message.")
23 |         print(e)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License  
 2 | 
 3 | Copyright (c) 2025 rohanmistry231
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy  
 6 | of this software and associated documentation files (the "Software"), to deal  
 7 | in the Software without restriction, including without limitation the rights  
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
 9 | copies of the Software, and to permit persons to whom the Software is  
10 | furnished to do so, subject to the following conditions:  
11 | 
12 | The above copyright notice and this permission notice shall be included in all  
13 | copies or substantial portions of the Software.  
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/modules/news.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | from config import speak, NEWS_API_KEY
 4 | 
 5 | def get_latest_news() -> None:
 6 |     """Fetch and read the latest news."""
 7 |     api_urls = {
 8 |         "business": f"https://newsapi.org/v2/top-headlines?country=in&category=business&apiKey={NEWS_API_KEY}",
 9 |         "entertainment": f"https://newsapi.org/v2/top-headlines?country=in&category=entertainment&apiKey={NEWS_API_KEY}",
10 |         "health": f"https://newsapi.org/v2/top-headlines?country=in&category=health&apiKey={NEWS_API_KEY}",
11 |         "science": f"https://newsapi.org/v2/top-headlines?country=in&category=science&apiKey={NEWS_API_KEY}",
12 |         "sports": f"https://newsapi.org/v2/top-headlines?country=in&category=sports&apiKey={NEWS_API_KEY}",
13 |         "technology": f"https://newsapi.org/v2/top-headlines?country=in&category=technology&apiKey={NEWS_API_KEY}"
14 |     }
15 |     speak("Which news category? Business, health, technology, sports, entertainment, or science?")
16 |     category = input("Type the category: ").lower()
17 |     url = api_urls.get(category)
18 |     if not url:
19 |         speak("Invalid category.")
20 |         return
21 | 
22 |     try:
23 |         response = requests.get(url)
24 |         news = json.loads(response.text)
25 |         speak("Here are the latest headlines.")
26 |         for article in news["articles"][:3]:  # Limit to 3 articles
27 |             title = article["title"]
28 |             print(title)
29 |             speak(title)
30 |             print(f"More info: {article['url']}")
31 |             if input("Continue? (y/n): ").lower() != "y":
32 |                 break
33 |         speak("That's all for now.")
34 |     except Exception as e:
35 |         speak("Failed to fetch news.")
36 |         print(e)


--------------------------------------------------------------------------------
/modules/search.py:
--------------------------------------------------------------------------------
 1 | import pywhatkit
 2 | import wikipedia
 3 | import webbrowser
 4 | from config import speak
 5 | import logging
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | def search_query(query: str) -> None:
10 |     """Handle search queries for Google, YouTube, or Wikipedia."""
11 |     query = query.lower().strip()
12 |     
13 |     if "google" in query:
14 |         query = query.replace("google search", "").replace("google", "").strip()
15 |         speak("Searching Google...")
16 |         try:
17 |             pywhatkit.search(query)
18 |             result = wikipedia.summary(query, sentences=1, auto_suggest=False)
19 |             speak(result)
20 |         except Exception as e:
21 |             logger.error(f"Google search error: {e}")
22 |             speak("No additional information available.")
23 |     
24 |     elif "youtube" in query:
25 |         query = query.replace("youtube search", "").replace("youtube", "").strip()
26 |         speak("Searching YouTube...")
27 |         try:
28 |             webbrowser.open(f"https://www.youtube.com/results?search_query={query}")
29 |             pywhatkit.playonyt(query)
30 |         except Exception as e:
31 |             logger.error(f"YouTube search error: {e}")
32 |             speak("Failed to search YouTube.")
33 |     
34 |     elif "wikipedia" in query or "what is" in query or "who is" in query:
35 |         query = query.replace("wikipedia", "").replace("search wikipedia", "").replace("what is", "").replace("who is", "").strip()
36 |         speak("Searching Wikipedia...")
37 |         try:
38 |             results = wikipedia.summary(query, sentences=2, auto_suggest=False)
39 |             print(results)
40 |             speak(results)
41 |         except Exception as e:
42 |             logger.error(f"Wikipedia search error: {e}")
43 |             speak("No results found on Wikipedia.")


--------------------------------------------------------------------------------
/modules/apps.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pyautogui
 3 | import webbrowser
 4 | from time import sleep
 5 | from config import speak, APP_MAPPINGS, WEBSITE_MAPPINGS
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | def open_app(query: str) -> None:
11 |     """Open an application or website."""
12 |     speak("Launching...")
13 |     query = query.lower().replace("open", "").replace("hey", "").strip()
14 |     
15 |     # Check for website
16 |     for site, url in WEBSITE_MAPPINGS.items():
17 |         if site in query:
18 |             webbrowser.open(f"https://{url}")
19 |             speak(f"Opening {site}")
20 |             return
21 |     
22 |     # Check for application
23 |     for app, path in APP_MAPPINGS.items():
24 |         if app in query:
25 |             try:
26 |                 if path.endswith(".exe"):
27 |                     os.startfile(path)
28 |                 else:
29 |                     os.system(f"start {path}")
30 |                 speak(f"Opening {app}")
31 |             except Exception as e:
32 |                 logger.error(f"Failed to open {app}: {e}")
33 |                 speak(f"Failed to open {app}")
34 |             return
35 |     
36 |     # Fallback for generic website
37 |     if any(x in query for x in [".com", ".co.in", ".org"]):
38 |         webbrowser.open(f"https://www.{query}")
39 |         speak(f"Opening {query}")
40 |     else:
41 |         speak("Application or website not recognized.")
42 | 
43 | def close_app(query: str) -> None:
44 |     """Close an application or browser tabs."""
45 |     speak("Closing...")
46 |     query = query.lower()
47 |     if "tab" in query:
48 |         num_tabs = int(query.split("tab")[0].strip()) if query.split("tab")[0].strip().isdigit() else 1
49 |         for _ in range(num_tabs):
50 |             pyautogui.hotkey("ctrl", "w")
51 |             sleep(0.5)
52 |         speak("Tabs closed.")
53 |     else:
54 |         for app, path in APP_MAPPINGS.items():
55 |             if app in query and not path.endswith(".exe"):
56 |                 try:
57 |                     os.system(f"taskkill /f /im {path}.exe")
58 |                     speak(f"Closed {app}")
59 |                 except Exception as e:
60 |                     logger.error(f"Failed to close {app}: {e}")
61 |                     speak(f"Failed to close {app}")
62 |                 return
63 |         speak("Application not recognized.")


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from dotenv import load_dotenv
  4 | 
  5 | # Load environment variables from .env file
  6 | load_dotenv()
  7 | 
  8 | # Project directories
  9 | BASE_DIR = Path(__file__).resolve().parent
 10 | DATA_DIR = BASE_DIR / "data"
 11 | 
 12 | # API keys (load from environment variables)
 13 | WOLFRAM_ALPHA_KEY = os.getenv("WOLFRAM_ALPHA_KEY", "3WR2KP-4U9X329R9U")
 14 | NEWS_API_KEY = os.getenv("NEWS_API_KEY", "1a1ac9b8505341478289bc11a04eb056")
 15 | EMAIL_USER = os.getenv("EMAIL_USER", "userid@gmail.com")
 16 | EMAIL_PASS = os.getenv("EMAIL_PASS", "your_password")
 17 | 
 18 | # Voice settings
 19 | VOICE_ID = 0
 20 | SPEECH_RATE = 170
 21 | 
 22 | # File paths
 23 | ALARM_FILE = DATA_DIR / "alarm.txt"
 24 | NOTES_FILE = DATA_DIR / "notes.txt"
 25 | REMEMBER_FILE = DATA_DIR / "remember.txt"
 26 | 
 27 | # Application mappings
 28 | APP_MAPPINGS = {
 29 |     "commandprompt": "cmd",
 30 |     "paint": "paint",
 31 |     "word": "winword",
 32 |     "excel": "excel",
 33 |     "chrome": "chrome",
 34 |     "vscode": "code",
 35 |     "powerpoint": "powerpnt",
 36 |     "youtube music": r"C:\Users\Rohan S Mistry\AppData\Local\Programs\youtube-music-desktop-app\YouTube Music Desktop App.exe",
 37 |     "vs code": r"C:\Users\Rohan S Mistry\AppData\Local\Programs\Microsoft VS Code\Code.exe"
 38 | }
 39 | 
 40 | # Website mappings
 41 | WEBSITE_MAPPINGS = {
 42 |     "google": "google.com",
 43 |     "youtube": "youtube.com",
 44 |     "facebook": "facebook.com",
 45 |     "twitter": "twitter.com",
 46 |     "wikipedia": "wikipedia.com",
 47 |     "instagram": "instagram.com",
 48 |     "baidu": "baidu.com",
 49 |     "yahoo": "yahoo.com",
 50 |     "yandex": "yandex.ru",
 51 |     "whatsapp": "whatsapp.com",
 52 |     "amazon": "amazon.com",
 53 |     "zoom": "zoom.us",
 54 |     "live": "live.com",
 55 |     "netflix": "netflix.com",
 56 |     "yahoo japan": "yahoo.co.jp",
 57 |     "vk": "vk.com",
 58 |     "reddit": "reddit.com",
 59 |     "office": "office.com",
 60 |     "naver": "naver.com",
 61 |     "pinterest": "pinterest.com",
 62 |     "discord": "discord.com",
 63 |     "linkedin": "linkedin.com",
 64 |     "cnn": "cnn.com",
 65 |     "microsoft": "microsoft.com",
 66 |     "mail": "mail.ru",
 67 |     "globo": "globo.com",
 68 |     "bing": "bing.com",
 69 |     "twitch": "twitch.tv",
 70 |     "google brazil": "google.com.br",
 71 |     "qq": "qq.com",
 72 |     "microsoft online": "microsoftonline.com",
 73 |     "ebay": "ebay.com",
 74 |     "msn": "msn.com",
 75 |     "yahoo news japan": "news.yahoo.co.jp",
 76 |     "duckduckgo": "duckduckgo.com",
 77 |     "ok": "ok.ru",
 78 |     "walmart": "walmart.com",
 79 |     "bilibili": "bilibili.com",
 80 |     "tiktok": "tiktok.com",
 81 |     "paypal": "paypal.com",
 82 |     "google germany": "google.de",
 83 |     "amazon japan": "amazon.co.jp",
 84 |     "aliexpress": "aliexpress.com",
 85 |     "amazon germany": "amazon.de",
 86 |     "rakuten japan": "rakuten.co.jp",
 87 |     "amazon uk": "amazon.co.uk"
 88 | }
 89 | 
 90 | def speak(text: str) -> None:
 91 |     """Speak the provided text using the text-to-speech engine."""
 92 |     import pyttsx3
 93 |     engine = pyttsx3.init("sapi5")
 94 |     voices = engine.getProperty("voices")
 95 |     engine.setProperty("voice", voices[VOICE_ID].id)
 96 |     engine.setProperty("rate", SPEECH_RATE)
 97 |     try:
 98 |         engine.say(text)
 99 |         engine.runAndWait()
100 |     except Exception as e:
101 |         import logging
102 |         logging.error(f"Speech error: {e}")


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OWN 1.0 - Personal Voice Assistant
  2 | 
  3 | ![OWN 1.0 Logo](https://via.placeholder.com/150?text=OWN+1.0) <!-- Replace with actual logo if available -->
  4 | 
  5 | OWN 1.0 is a modular, voice-activated personal assistant built in Python, designed to streamline daily tasks through natural language interaction. Inspired by fictional assistants like JARVIS, OWN 1.0 leverages speech recognition, NLP, and API integrations to perform tasks such as opening applications, searching the web, sending emails, setting alarms, and more. This project showcases modern software engineering practices, including modular design, error handling, and NLP integration, making it a standout portfolio piece for an AI/ML fresher.
  6 | 
  7 | ## Features
  8 | 
  9 | - **Voice Interaction**: Uses `pyttsx3` for text-to-speech and `SpeechRecognition` for voice input, enabling seamless user interaction.
 10 | - **Natural Language Processing**: Integrates `spaCy` for intent recognition, improving command understanding.
 11 | - **Web and App Control**: Opens websites (e.g., Google, YouTube, Netflix) and applications (e.g., VS Code, Notepad) with voice commands.
 12 | - **Search Capabilities**: Performs Google, YouTube, and Wikipedia searches using `pywhatkit` and `wikipedia` APIs.
 13 | - **Email and Messaging**: Sends emails via Gmail SMTP and WhatsApp messages using `pywhatkit`.
 14 | - **System Control**: Manages system volume, locks the device, shuts down, and clears the recycle bin.
 15 | - **Productivity Tools**: Sets alarms, takes notes, and remembers user inputs stored in text files.
 16 | - **News Updates**: Fetches and reads latest news from various categories using the News API.
 17 | - **Mathematical Calculations**: Solves math queries using Wolfram Alpha API.
 18 | - **Keyboard Shortcuts**: Executes browser shortcuts (e.g., copy, paste, pause) via `pyautogui`.
 19 | - **Extensibility**: Modular architecture allows easy addition of new features.
 20 | 
 21 | ## Project Structure
 22 | 
 23 | ```
 24 | voice_assistant/
 25 | ├── config.py               # Configuration and environment variable management
 26 | ├── app.py                  # Main application logic
 27 | ├── modules/                # Modular feature implementations
 28 | │   ├── chrome_shortcut.py  # Browser shortcut handling
 29 | │   ├── search.py           # Web search functionalities
 30 | │   ├── apps.py             # Application and website launching
 31 | │   ├── keyboard.py         # System keyboard controls
 32 | │   ├── news.py             # News fetching and reading
 33 | │   ├── calculator.py       # Mathematical calculations
 34 | │   ├── whatsapp.py         # WhatsApp messaging
 35 | │   ├── alarm.py            # Alarm setting and ringing
 36 | ├── requirements.txt        # Project dependencies
 37 | ├── README.md               # Project documentation
 38 | ```
 39 | 
 40 | ## Installation
 41 | 
 42 | ### Prerequisites
 43 | - Python 3.8+
 44 | - A microphone for voice input
 45 | - Windows OS (due to `pyttsx3` SAPI5 and system-specific commands)
 46 | - API keys for Wolfram Alpha and News API
 47 | - Gmail account with app password for email functionality
 48 | 
 49 | ### Steps
 50 | 1. **Clone the Repository**:
 51 |    ```bash
 52 |    git clone https://github.com/rohanmistry231/voice-assistant.git
 53 |    cd voice-assistant
 54 |    ```
 55 | 
 56 | 2. **Set Up a Virtual Environment** (recommended):
 57 |    ```bash
 58 |    python -m venv venv
 59 |    source venv/bin/activate  # On Windows: venv\Scripts\activate
 60 |    ```
 61 | 
 62 | 3. **Install Dependencies**:
 63 |    ```bash
 64 |    pip install -r requirements.txt
 65 |    ```
 66 | 
 67 | 4. **Install spaCy Model**:
 68 |    ```bash
 69 |    python -m spacy download en_core_web_sm
 70 |    ```
 71 | 
 72 | 5. **Configure Environment Variables**:
 73 |    Create a `.env` file in the project root with the following:
 74 |    ```env
 75 |    WOLFRAM_ALPHA_KEY=your_wolfram_alpha_key
 76 |    NEWS_API_KEY=your_news_api_key
 77 |    EMAIL_USER=your_email@gmail.com
 78 |    EMAIL_PASS=your_gmail_app_password
 79 |    ```
 80 |    - Obtain API keys from [Wolfram Alpha](https://developer.wolframalpha.com/) and [News API](https://newsapi.org/).
 81 |    - Generate a Gmail app password at [Google Account Settings](https://myaccount.google.com/security).
 82 | 
 83 | 6. **Ensure Music File for Alarms**:
 84 |    Place a `music.mp3` file in the project root or update `alarm.py` to point to an existing audio file.
 85 | 
 86 | 7. **Run the Assistant**:
 87 |    ```bash
 88 |    python app.py
 89 |    ```
 90 | 
 91 | ## Usage
 92 | 
 93 | 1. **Start the Assistant**: Run `app.py`. The assistant greets you based on the time of day and asks for your name.
 94 | 2. **Issue Voice Commands**: Speak commands like:
 95 |    - "Open YouTube"
 96 |    - "Search Google for Python tutorials"
 97 |    - "Send an email to mama"
 98 |    - "Set an alarm for 10 and 30"
 99 |    - "Tell me a joke"
100 |    - "What's the time?"
101 |    - "Volume up"
102 |    - "Show notes"
103 | 3. **Exit**: Say "exit" to stop the assistant.
104 | 
105 | ### Example Commands
106 | | Command | Action |
107 | |---------|--------|
108 | | "Open Google" | Opens google.com in the default browser |
109 | | "Search Wikipedia for AI" | Reads a Wikipedia summary about AI |
110 | | "Play song" | Plays an MP3 file from the music directory |
111 | | "Email to user" | Prompts for email content and recipient |
112 | | "Calculate 2 plus 2" | Solves the math query using Wolfram Alpha |
113 | | "News" | Fetches latest news in a chosen category |
114 | | "Remember that meeting at 5 PM" | Saves the reminder to a file |
115 | 
116 | ## Technical Details
117 | 
118 | ### Technologies Used
119 | - **Python Libraries**:
120 |   - `pyttsx3`: Text-to-speech for voice output
121 |   - `SpeechRecognition`: Voice input via Google Speech API
122 |   - `spaCy`: NLP for intent recognition
123 |   - `pywhatkit`: YouTube and WhatsApp automation
124 |   - `wikipedia`: Wikipedia summaries
125 |   - `requests`: API calls for news
126 |   - `wolframalpha`: Mathematical calculations
127 |   - `pyautogui`: Keyboard and mouse automation
128 |   - `pynput`: System volume control
129 |   - `python-dotenv`: Environment variable management
130 | - **APIs**:
131 |   - Wolfram Alpha API
132 |   - News API
133 | - **Other**:
134 |   - Modular architecture with separate modules for each feature
135 |   - Logging for debugging and error tracking
136 |   - Environment variables for secure configuration
137 | 
138 | ### Key Features
139 | - **Modular Design**: Each feature (e.g., search, apps, news) is encapsulated in its own module, enhancing maintainability.
140 | - **NLP Integration**: `spaCy` processes user queries to identify intents (e.g., "open", "search"), improving command accuracy.
141 | - **Error Handling**: Comprehensive try-except blocks and logging ensure robustness.
142 | - **Security**: Sensitive data (API keys, email credentials) are stored in a `.env` file.
143 | - **Performance**: Optimized speech recognition with ambient noise adjustment and timeouts.
144 | 
145 | ## Development Highlights
146 | 
147 | This project demonstrates skills critical for an AI/ML fresher:
148 | - **AI/ML**: Integration of NLP with `spaCy` for intent recognition, showcasing understanding of natural language processing.
149 | - **Software Engineering**: Modular codebase, logging, and environment variable management reflect professional development practices.
150 | - **API Integration**: Seamless use of external APIs (Wolfram Alpha, News API) for enhanced functionality.
151 | - **Automation**: System control via `pyautogui` and `pynput` demonstrates automation capabilities.
152 | - **User Experience**: Context-aware greetings and clear voice feedback enhance usability.
153 | 
154 | ## Future Enhancements
155 | 
156 | - **GUI Interface**: Add a Tkinter or PyQt interface for visual interaction.
157 | - **Advanced NLP**: Integrate a transformer model (e.g., BERT) for better intent recognition.
158 | - **Wake Word Detection**: Implement wake-word activation (e.g., "Hey OWN") using Porcupine.
159 | - **Cross-Platform Support**: Adapt for Linux/Mac by replacing Windows-specific commands.
160 | - **Cloud Integration**: Store notes and reminders in a cloud database (e.g., Firebase).
161 | - **Multi-Language Support**: Add support for non-English voice commands.
162 | 
163 | ## Contributing
164 | 
165 | Contributions are welcome! To contribute:
166 | 1. Fork the repository.
167 | 2. Create a new branch (`git checkout -b feature/your-feature`).
168 | 3. Commit your changes (`git commit -m "Add your feature"`).
169 | 4. Push to the branch (`git push origin feature/your-feature`).
170 | 5. Open a pull request.
171 | 
172 | Please ensure your code follows PEP 8 guidelines and includes appropriate tests.
173 | 
174 | ## Acknowledgments
175 | 
176 | - Inspired by fictional assistants like JARVIS from Iron Man.
177 | - Thanks to the open-source community for libraries like `pyttsx3`, `SpeechRecognition`, and `spaCy`.
178 | - Built as a portfolio project to demonstrate AI/ML and software engineering skills.
179 | 
180 | ## Contact
181 | 
182 | - **Author**: Rohan S. Mistry
183 | - **Email**: rohanmistry231@gmail.com
184 | - **LinkedIn**: [linkedin](https://www.linkedin.com/in/rohan-mistry-493987202/)
185 | 
186 | Feel free to reach out for collaboration or feedback!
187 | 
188 | ---
189 | 
190 | *Built with ❤️ by Rohan S. Mistry*


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import pyttsx3
  2 | import speech_recognition as sr
  3 | import datetime
  4 | from pathlib import Path
  5 | import os
  6 | import logging
  7 | import shutil
  8 | from modules import chrome_shortcut, search, apps, keyboard, news, calculator, whatsapp, alarm
  9 | from config import VOICE_ID, SPEECH_RATE, ALARM_FILE, NOTES_FILE, REMEMBER_FILE, EMAIL_USER, EMAIL_PASS, APP_MAPPINGS, WEBSITE_MAPPINGS
 10 | 
 11 | # Setup logging
 12 | logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | # Initialize text-to-speech engine
 16 | engine = None
 17 | voices = None
 18 | try:
 19 |     engine = pyttsx3.init("sapi5")
 20 |     voices = engine.getProperty("voices")
 21 |     engine.setProperty("voice", voices[VOICE_ID].id)
 22 |     engine.setProperty("rate", SPEECH_RATE)
 23 | except Exception as e:
 24 |     logger.error(f"Failed to initialize text-to-speech engine: {e}")
 25 | 
 26 | def speak(text: str) -> None:
 27 |     """Speak the provided text using the text-to-speech engine."""
 28 |     global engine, voices
 29 |     if engine is None or voices is None:
 30 |         logger.warning("Text-to-speech engine not initialized. Attempting to initialize...")
 31 |         try:
 32 |             engine = pyttsx3.init("sapi5")
 33 |             voices = engine.getProperty("voices")
 34 |             engine.setProperty("voice", voices[VOICE_ID].id)
 35 |             engine.setProperty("rate", SPEECH_RATE)
 36 |         except Exception as e:
 37 |             logger.error(f"Failed to initialize text-to-speech engine: {e}")
 38 |             return
 39 | 
 40 |     try:
 41 |         engine.say(text)
 42 |         engine.runAndWait()
 43 |     except TypeError as te:
 44 |         logger.error(f"TypeError in speech (likely comtypes issue): {te}")
 45 |         # Attempt to reinitialize the engine
 46 |         try:
 47 |             engine = pyttsx3.init("sapi5")
 48 |             voices = engine.getProperty("voices")
 49 |             engine.setProperty("voice", voices[VOICE_ID].id)
 50 |             engine.setProperty("rate", SPEECH_RATE)
 51 |             engine.say(text)
 52 |             engine.runAndWait()
 53 |         except Exception as e2:
 54 |             logger.error(f"Failed to reinitialize speech engine after TypeError: {e2}")
 55 |     except Exception as e:
 56 |         logger.error(f"Speech error: {e}")
 57 |         # Attempt to reinitialize the engine
 58 |         try:
 59 |             engine = pyttsx3.init("sapi5")
 60 |             voices = engine.getProperty("voices")
 61 |             engine.setProperty("voice", voices[VOICE_ID].id)
 62 |             engine.setProperty("rate", SPEECH_RATE)
 63 |             engine.say(text)
 64 |             engine.runAndWait()
 65 |         except Exception as e2:
 66 |             logger.error(f"Failed to reinitialize speech engine: {e2}")
 67 | 
 68 | def take_command() -> str:
 69 |     """Capture voice input and convert to text."""
 70 |     r = sr.Recognizer()
 71 |     with sr.Microphone() as source:
 72 |         logger.info("Listening...")
 73 |         r.adjust_for_ambient_noise(source, duration=0.1)
 74 |         r.energy_threshold = 300
 75 |         r.pause_threshold = 1
 76 |         try:
 77 |             audio = r.listen(source, timeout=5, phrase_time_limit=4)
 78 |             logger.info("Recognizing...")
 79 |             query = r.recognize_google(audio, language="en-in").lower()
 80 |             logger.info(f"User said: {query}")
 81 |             return query
 82 |         except sr.WaitTimeoutError:
 83 |             logger.warning("No speech detected.")
 84 |             return "none"
 85 |         except sr.UnknownValueError:
 86 |             logger.warning("Could not understand audio.")
 87 |             return "none"
 88 |         except Exception as e:
 89 |             logger.error(f"Recognition error: {e}")
 90 |             return "none"
 91 | 
 92 | def wish_user() -> None:
 93 |     """Greet the user based on the time of day."""
 94 |     hour = datetime.datetime.now().hour
 95 |     greetings = {
 96 |         (0, 12): "Good Morning",
 97 |         (12, 18): "Good Afternoon",
 98 |         (18, 24): "Good Evening"
 99 |     }
100 |     greeting = next(g for h, g in greetings.items() if h[0] <= hour < h[1])
101 |     speak(f"{greeting} Sir! I am your Assistant, OWN 1.0.")
102 | 
103 | def get_username() -> str:
104 |     """Prompt for and return the user's name."""
105 |     speak("What should I call you, sir?")
106 |     uname = take_command()
107 |     if uname != "none":
108 |         speak(f"Welcome, Mister {uname}")
109 |         print(f"{'#'*20}\nWelcome Mr. {uname}\n{'#'*20}".center(shutil.get_terminal_size().columns))
110 |         return uname
111 |     return "User"
112 | 
113 | def send_email(to: str, content: str) -> bool:
114 |     """Send an email to the specified recipient."""
115 |     try:
116 |         import smtplib
117 |         server = smtplib.SMTP("smtp.gmail.com", 587)
118 |         server.ehlo()
119 |         server.starttls()
120 |         server.login(EMAIL_USER, EMAIL_PASS)
121 |         server.sendmail(EMAIL_USER, to, content)
122 |         server.close()
123 |         return True
124 |     except Exception as e:
125 |         logger.error(f"Email error: {e}")
126 |         return False
127 | 
128 | def detect_intent(query: str) -> str:
129 |     """Detect the intent of the query using keyword matching."""
130 |     query = query.lower()
131 |     if any(word in query for word in ["open", "launch"]):
132 |         return "open"
133 |     elif any(word in query for word in ["close", "exit"]):
134 |         return "close"
135 |     elif any(word in query for word in ["search", "find", "google", "youtube", "wikipedia", "what is", "who is"]):
136 |         return "search"
137 |     elif any(word in query for word in ["play", "music", "song"]):
138 |         return "play"
139 |     elif any(word in query for word in ["email", "mail"]):
140 |         return "email"
141 |     elif any(word in query for word in ["joke", "funny"]):
142 |         return "joke"
143 |     return "unknown"
144 | 
145 | def process_command(query: str, username: str) -> bool:
146 |     """Process the user's voice command."""
147 |     intent = detect_intent(query)
148 | 
149 |     if intent == "open" or "open" in query:
150 |         # Handle both apps and websites
151 |         for app in APP_MAPPINGS:
152 |             if app in query:
153 |                 apps.open_app(query)
154 |                 return True
155 |         for site in WEBSITE_MAPPINGS:
156 |             if site in query:
157 |                 apps.open_app(query)
158 |                 return True
159 |     elif intent == "close" or "close" in query:
160 |         apps.close_app(query)
161 |         return True
162 |     elif intent == "search":
163 |         search.search_query(query)
164 |         return True
165 |     elif intent == "play" or "play song" in query:
166 |         music_dir = Path("C:/Users/Rohan S Mistry/Music")
167 |         songs = list(music_dir.glob("*.mp3"))
168 |         if songs:
169 |             os.startfile(songs[0])
170 |             speak("Playing music.")
171 |         else:
172 |             speak("No music files found.")
173 |         return True
174 |     elif intent == "email" or "email" in query:
175 |         speak("What should I say?")
176 |         content = take_command()
177 |         if "mama" in query:
178 |             to = "pritesh_02@yahoo.com"
179 |         else:
180 |             speak("To whom should I send the email?")
181 |             to = take_command().replace(" ", "")
182 |         if send_email(to, content):
183 |             speak("Email sent successfully.")
184 |         else:
185 |             speak("Failed to send email.")
186 |         return True
187 |     elif intent == "joke" or "joke" in query:
188 |         import pyjokes
189 |         speak(pyjokes.get_joke())
190 |         return True
191 |     elif "time" in query:
192 |         current_time = datetime.datetime.now().strftime("%I:%M %p")
193 |         speak(f"The time is {current_time}")
194 |         return True
195 |     elif "exit" in query:
196 |         speak(f"Goodbye, {username}. Thanks for using OWN 1.0.")
197 |         return False
198 |     elif "note" in query:
199 |         if "write" in query:
200 |             speak("What should I write?")
201 |             note = take_command()
202 |             with NOTES_FILE.open("a") as f:
203 |                 f.write(f"{datetime.datetime.now().strftime('%I:%M %p')}: {note}\n")
204 |             speak("Note saved.")
205 |         elif "show" in query:
206 |             with NOTES_FILE.open("r") as f:
207 |                 content = f.read()
208 |                 print(content)
209 |                 speak(content[:100])  # Read first 100 chars
210 |         return True
211 |     elif "remember" in query:
212 |         if "what do you remember" in query:
213 |             with REMEMBER_FILE.open("r") as f:
214 |                 speak(f"You told me to remember: {f.read()}")
215 |         else:
216 |             message = query.replace("remember that", "").strip()
217 |             with REMEMBER_FILE.open("a") as f:
218 |                 f.write(f"{message}\n")
219 |             speak(f"I'll remember: {message}")
220 |         return True
221 |     elif "alarm" in query:
222 |         speak("Please tell the time for the alarm (e.g., 10 and 30)")
223 |         time_input = take_command().replace(" and ", ":")
224 |         alarm.set_alarm(time_input)
225 |         return True
226 |     elif "news" in query:
227 |         news.get_latest_news()
228 |         return True
229 |     elif "whatsapp" in query:
230 |         whatsapp.send_whatsapp_message()
231 |         return True
232 |     elif "calculate" in query:
233 |         calculator.calculate(query)
234 |         return True
235 |     elif any(cmd in query for cmd in ["select all", "copy", "cut", "paste", "pause", "resume", "forward", "backward", "change window"]):
236 |         chrome_shortcut.handle_shortcut(query)
237 |         return True
238 |     elif "volume up" in query:
239 |         keyboard.volume_up()
240 |         return True
241 |     elif "volume down" in query:
242 |         keyboard.volume_down()
243 |         return True
244 |     else:
245 |         speak("Sorry, I didn't understand that command.")
246 |         return True
247 | 
248 | def main():
249 |     """Main function to run the voice assistant."""
250 |     wish_user()
251 |     username = get_username()
252 |     speak("How can I help you, sir?")
253 |     
254 |     while True:
255 |         query = take_command()
256 |         if query == "none":
257 |             continue
258 |         if not process_command(query, username):
259 |             break
260 | 
261 | if __name__ == "__main__":
262 |     main()


--------------------------------------------------------------------------------