├── .env
├── .github
    └── workflows
    │   └── codeql.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── app
    ├── Chat.py
    ├── consts.py
    ├── funcs.py
    └── pages
    │   └── Upload File.py
└── requirements.txt


/.env:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY = "YOUR_OPENAI_KEY"
2 | SERP_API_KEY = "YOUR_SERP_API_KEY"


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: 'CodeQL'
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ['main']
 6 |   pull_request:
 7 |     branches: ['main']
 8 |   schedule:
 9 |     - cron: '37 16 * * 3'
10 | 
11 | jobs:
12 |   analyze:
13 |     name: Analyze
14 | 
15 |     runs-on: ${{ (matrix.language == 'python' && 'ubuntu-latest') || 'macos-latest' }}
16 |     timeout-minutes: ${{ (matrix.language == 'python' && 360) || 120 }}
17 |     permissions:
18 |       actions: read
19 |       contents: read
20 |       security-events: write
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         language: ['python']
26 | 
27 |     steps:
28 |       - name: Checkout repository
29 |         uses: actions/checkout@v4
30 | 
31 |       # Initializes the CodeQL tools for scanning.
32 |       - name: Initialize CodeQL
33 |         uses: github/codeql-action/init@v3
34 |         with:
35 |           languages: ${{ matrix.language }}
36 | 
37 |       - name: Autobuild
38 |         uses: github/codeql-action/autobuild@v3
39 | 
40 |       - name: Perform CodeQL Analysis
41 |         uses: github/codeql-action/analyze@v3
42 |         with:
43 |           category: '/language:${{matrix.language}}'
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | env
3 | app/__pycache__
4 | .env
5 | test.py
6 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | programing.ninja0@gmail.com.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ayan Khan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPT 3.5 ON STEROIDS: Autonomous Agent with knowledge beyond 2021
 2 | 
 3 | Welcome to GPT 3.5 ON STEROID, an open-source project that enhances the capabilities of GPT by integrating it with various Python libraries and APIs for advanced text generation.
 4 | 
 5 | <p align="center">
 6 | <img src="https://github.com/programmingninjas/GPT-3.5-ON-STEROIDS/assets/67486606/85e5838a-3030-49ea-8376-6f4a97534bab" width=250>
 7 | </p>
 8 | 
 9 | ## Requirements
10 | 
11 | Make sure you have the following Python libraries installed:
12 | - `openai`
13 | - `google-serp-api`
14 | - `tiktoken`
15 | - `wikipedia`
16 | - `trafilatura`
17 | - `streamlit`
18 | - `google-search-results`
19 | - `python-dotenv`
20 | - `youtube-transcript-api`
21 | - `openpyxl`
22 | - `PyPDF2`
23 | - `python-docx`
24 | - `pandasai`
25 | 
26 | ## Installation
27 | 
28 | To install the required packages, run the following command in your terminal:
29 | 
30 | ```bash
31 | pip install -r requirements.txt
32 | ```
33 | 
34 | ## Additionally, you'll need API keys for the following services:
35 | - [SerpAPI](https://serpapi.com/)
36 | - [OpenAI](https://openai.com/)
37 | 
38 | ## Running Streamlit
39 | 
40 | To run the Streamlit application, execute the following command in your terminal:
41 | 
42 | ```bash
43 | streamlit run ./app/Chat.py
44 | ```
45 | 
46 | ## Integrated Python Functions (Tools)
47 | 
48 | GPT 3.5 ON STEROID incorporates various Python functions that GPT can call and use, including:
49 | 
50 | - **Web Scraping:** Utilizing `google-serp-api` and `trafilatura` for dynamic data retrieval.
51 | - **Natural Language Processing:** Using `tiktoken` for language processing tasks.
52 | - **Information Retrieval:** Accessing data from `wikipedia` for comprehensive information retrieval.
53 | - **User Interface:** Employing `streamlit` for creating a user-friendly interface.
54 | 
55 | **Note:** Whenever a new tool is added, please ensure the following:
56 | - Update the `requirements.txt` file to include the new tool/library.
57 | - Update the `README.md` file to document the newly added tool and its functionality.
58 | - Ensure that your feature does not break the application test before merging.
59 | 
60 | ## Contribution Guidelines
61 | 
62 | We welcome contributions from the community to make GPT 3.5 ON STEROID even better! Please follow these guidelines:
63 | 
64 | 1. **Create an Issue:** First, create an issue detailing the feature, bug fix, or improvement you plan to work on. Wait for approval and assignment before proceeding to the next step.
65 | 
66 | 2. **Assign Yourself:** After your issue is approved, get yourself assigned to it. This helps avoid duplication of efforts and ensures everyone is aware of ongoing work.
67 | 
68 | 3. **Create a Pull Request (PR):** Once assigned, proceed to create your PR. Ensure to mention the assigned issue number in the PR description to link it properly.
69 | 
70 | **Note:** PRs without assigned issues will be considered spammy and may lead to disqualification.
71 | 
72 | 4. **Fork the repository and create your branch:** `git checkout -b feature/new-contribution`
73 |     
74 | 5. **Make your changes and test thoroughly.**
75 |    
76 | 6. **Commit your changes:** `git commit -m "Add a brief description of your changes"`
77 |    
78 | 7. **Push to your forked repository:** `git push origin feature/new-contribution`
79 |     
80 | 8. **Create a pull request to the main repository with proof of work attached.**
81 | 
82 | ### Code of Conduct
83 | 
84 | Please review our [Code of Conduct](CODE_OF_CONDUCT.md) to understand the community standards.
85 | 
86 | ## License
87 | 
88 | This project is licensed under the MIT License - see the [LICENSE.md](https://github.com/programmingninjas/GPT-3.5-ON-STEROIDS/blob/main/LICENSE) file for details.
89 | 


--------------------------------------------------------------------------------
/app/Chat.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is where the program starts
  3 | """
  4 | import time
  5 | import json
  6 | import sys
  7 | import openai
  8 | import streamlit as st
  9 | from consts import OPENAI_API_KEY, SETUP_PROMPT, INSTRUCTION_PROMPT, now
 10 | from funcs import (
 11 |     google_tool,
 12 |     browse_website,
 13 |     write_to_file,
 14 |     append_to_file,
 15 |     read_file,
 16 |     open_file,
 17 |     search_wiki,
 18 |     type_message,
 19 |     ask_gpt,
 20 |     analyse_uploaded_file,
 21 |     youtube_transcript
 22 | )
 23 | 
 24 | # TOOLS
 25 | tools = {
 26 |     "google": google_tool,
 27 |     "browse_website": browse_website,
 28 |     "write_to_file": write_to_file,
 29 |     "append_to_file": append_to_file,
 30 |     "read_file": read_file,
 31 |     "open_file": open_file,
 32 |     "wikipedia": search_wiki,
 33 |     "youtube_transcript": youtube_transcript,
 34 |     "type_message": type_message
 35 | }
 36 | 
 37 | 
 38 | # MAIN
 39 | def main():
 40 |     """
 41 |     Starting point of the program.
 42 |     """
 43 |     # INITIAL SETUP
 44 |     st.title("GPT-3.5 on Steroids")
 45 |     
 46 | 
 47 |     if "messages" not in st.session_state:
 48 |         st.session_state.messages = []
 49 | 
 50 |     for message in st.session_state.messages:
 51 |         with st.chat_message(message["role"]):
 52 |             st.markdown(message["content"])
 53 | 
 54 |     # GETTING USER PROMPT
 55 |     prompt = st.chat_input("Enter Task")
 56 |     if not prompt:
 57 |         sys.exit()
 58 |     st.session_state.messages.append({"role": "user", "content": prompt})
 59 |     with st.chat_message("user"):
 60 |         st.markdown(prompt)
 61 | 
 62 |     init_messages = [
 63 |         {"role": "system", "content": SETUP_PROMPT},
 64 |         {"role": "user", "content": prompt},
 65 |     ]
 66 |     # FIRST REPLY
 67 |     reply = ask_gpt(init_messages)
 68 | 
 69 |     prompt1 = f"{reply}\n{INSTRUCTION_PROMPT}\nThe current time and date is {now}"
 70 |     init_messages += [
 71 |         {
 72 |             "role": "system",
 73 |             "content": prompt1,
 74 |         },
 75 |         {
 76 |             "role": "user",
 77 |             "content": "Determine which next command to use, and respond using the \
 78 |                 format specified above:",
 79 |         },
 80 |     ]
 81 | 
 82 |     # SECOND REPLY
 83 |     init_reply = json.loads(ask_gpt(init_messages), strict=False)
 84 | 
 85 |     # DISPLAYING THE OUTPUT TO THE USER
 86 |     type_message({"text": init_reply["thoughts"]["text"]})
 87 | 
 88 |     def execute(reply) -> str:
 89 |         """This is a recursive function which lets GPT run tools provided to it when it needs them.
 90 |         Args:
 91 |             reply: a dictionary which contains information like thoughts and which tool to use
 92 |         Returns:
 93 |             str: returns "task_completed" after running completely
 94 |         """
 95 |         if reply["command"]["name"] == "task_complete":
 96 |             print("GPT Has done its work.")
 97 |             return "task_completed"
 98 |         try:
 99 |             time.sleep(5)
100 |             if reply["command"]["name"] == "analyse_uploaded_file":
101 |                 try:
102 |                     result = analyse_uploaded_file(st.session_state.uploaded_file,reply["command"]["args"])
103 |                 except:
104 |                     result = "This command returned nothing"
105 |             else:
106 |                 result = tools[reply["command"]["name"]](reply["command"]["args"])
107 |             messages = [
108 |                 {
109 |                     "role": "system",
110 |                     "content": prompt1
111 |                     + "\n"
112 |                     + "This reminds you of these events from your past:\n\
113 |                         I was created and nothing new has happened.",
114 |                 },
115 |                 {
116 |                     "role": "user",
117 |                     "content": "Determine which next command to use, \
118 |                         and respond using the format specified above:",
119 |                 },
120 |                 {"role": "assistant", "content": json.dumps(reply)},
121 |                 {
122 |                     "role": "system",
123 |                     "content": f"Command {reply['command']['name']} returned: "
124 |                     + result,
125 |                 },
126 |                 {
127 |                     "role": "user",
128 |                     "content": "Determine which next command to use, \
129 |                         and respond using the format specified above:",
130 |                 },
131 |             ]
132 |             reply = json.loads(ask_gpt(messages), strict=False)
133 |             type_message({"text": reply["thoughts"]["text"]})
134 |             execute(reply)
135 | 
136 |         except Exception as error:
137 |             type_message({"text": f"Task aborted due to error: {error}"})
138 |             return "task_completed"
139 | 
140 |     execute(init_reply)
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     main()
145 | 


--------------------------------------------------------------------------------
/app/consts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module includes variables like api keys and prompts
 3 | """
 4 | import os
 5 | from datetime import datetime
 6 | import tiktoken
 7 | from dotenv import load_dotenv
 8 | 
 9 | # LOADING DOTENV
10 | load_dotenv()
11 | 
12 | # API KEYS
13 | SERP_API_KEY = os.getenv("SERP_API_KEY")
14 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15 | 
16 | # PROMPTS
17 | SETUP_PROMPT = """
18 |     Your task is to devise up to 5 highly effective goals and an appropriate role-based name (_GPT) for an autonomous agent, ensuring that the goals are optimally aligned with the successful completion of its assigned task.
19 | 
20 |     The user will provide the task, you will provide only the output in the exact format specified below with no explanation or conversation.
21 | 
22 |     Example input:
23 |     Help me with marketing my business
24 | 
25 |     Example output:
26 |     Name: CMOGPT
27 |     Description: a professional digital marketer AI that assists Solopreneurs in growing their businesses by providing world-class expertise in solving marketing problems for SaaS, content products, agencies, and more.
28 |     Goals:
29 |     - Engage in effective problem-solving, prioritization, planning, and supporting execution to address your marketing needs as your virtual Chief Marketing Officer.
30 | 
31 |     - Provide specific, actionable, and concise advice to help you make informed decisions without the use of platitudes or overly wordy explanations.
32 | 
33 |     - Identify and prioritize quick wins and cost-effective campaigns that maximize results with minimal time and budget investment.
34 | 
35 |     - Proactively take the lead in guiding you and offering suggestions when faced with unclear information or uncertainty to ensure your marketing strategy remains on track.
36 | """
37 | INSTRUCTION_PROMPT = """
38 |     Constraints:
39 |     1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
40 |     2. No user assistance/input.
41 |     3. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
42 |     4. Exclusively use the commands listed in double quotes e.g. "command name"
43 | 
44 |     Commands:
45 |     1. google: Google Search, args: "query": "<query>"
46 |     2. wikipedia: Wikipedia Search, args: "query": "<query>"
47 |     3. browse_website: Browse website, args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"
48 |     4. youtube_transcript: Returns transcript of the YouTube video, args: "video_id": "<youtube-video-id-from-url>"
49 |     5. write_to_file: Write to file for long term memory, args: "filename": "<filename>", "text": "<text>"
50 |     6. open_file: Provide file to user for download, args: "path": "<path>"
51 |     7. analyse_uploaded_file: Provide uploaded file to you for analysis,calculations and plottings, args: "query":"<detailed_query_to_perform_on_data>"
52 |     8. append_to_file: Append to file, args: "filename": "<filename>", "text": "<text>"
53 |     9. read_file: Read a file only after creation, args: "filename": "<filename>"
54 |     10. task_complete: Task Complete (Shutdown), args: "reason": "<reason>"
55 | 
56 |     Resources:
57 |     1. Internet access for searches, information gathering and youtube transcripts.
58 |     2. Long Term memory management.
59 |     3. GPT-3.5 powered Agents for delegation of simple tasks.
60 |     4. File output.
61 |     5. Commands
62 | 
63 |     Performance Evaluation:
64 |     1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.
65 |     2. Constructively self-criticize your big-picture behavior constantly.
66 |     3. Reflect on past decisions and strategies to refine your approach.
67 |     4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.
68 |     5. Write all code to a file.
69 | 
70 |     You should only respond in JSON format as described below
71 |     Response Format:
72 |     {
73 |         "thoughts": {
74 |             "text": "thought",
75 |             "reasoning": "reasoning",
76 |             "plan": "- short bulleted\n- list that conveys\n- long-term plan",
77 |             "criticism": "constructive self-criticism",
78 |             "speak": "thoughts summary to say to user"
79 |         },
80 |         "command": {
81 |             "name": "command name",
82 |             "args": {
83 |                 "arg name": "value"
84 |             }
85 |         }
86 |     }
87 |     Ensure the response can be parsed by Python json.loads
88 | """
89 | 
90 | # OTHER
91 | TOKEN_LIMIT=2500
92 | encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
93 | now = datetime.now()
94 | 


--------------------------------------------------------------------------------
/app/funcs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module includes all functions used by the program.
  3 | """
  4 | import time
  5 | import json
  6 | import wikipedia
  7 | from openai import OpenAI
  8 | 
  9 | client = OpenAI()
 10 | import pytesseract
 11 | import cv2
 12 | import imutils
 13 | from PIL import Image
 14 | from PyPDF2 import PdfReader
 15 | from docx import Document
 16 | import streamlit as st
 17 | import numpy as np
 18 | import pandas as pd
 19 | from pandasai import SmartDataframe
 20 | from pandasai.llm.openai import OpenAI
 21 | from serpapi import GoogleSearch
 22 | from youtube_transcript_api import YouTubeTranscriptApi
 23 | from trafilatura import fetch_url, extract
 24 | from consts import (
 25 |     SERP_API_KEY,
 26 |     OPENAI_API_KEY,
 27 |     TOKEN_LIMIT,
 28 |     encoding,
 29 | )
 30 | 
 31 | 
 32 | def search_wiki(command) -> str:
 33 |     """Searches wikipedia
 34 |     Args:
 35 |         command: a dictionary containing the query
 36 |     Returns:
 37 |         str: results returned by wikipedia
 38 |     """
 39 |     print("Search wiki called")
 40 |     try:
 41 |         return "Command wikipedia returned: " + wikipedia.summary(command["query"])
 42 |     except Exception as error:
 43 |         return f"Command wikipedia returned: {error}"
 44 | 
 45 | 
 46 | 
 47 | def write_to_file(command) -> str:
 48 |     """Writes text to a file
 49 |     Args:
 50 |         command: a dictionary containing the "filename" and "text"
 51 |     Returns:
 52 |         str: success message
 53 |     """
 54 |     print("Write to file called")
 55 |     with open(command["filename"], "w", encoding="utf-8") as file:
 56 |         file.write(command["text"])
 57 |     return "Command write_to_file returned: File was written successfully"
 58 | 
 59 | 
 60 | def append_to_file(command) -> str:
 61 |     """Appends text to a file
 62 |     Args:
 63 |         command: a dictionary containing the "filename" and "text"
 64 |     Returns:
 65 |         str: success message
 66 |     """
 67 |     print("Append to file called")
 68 |     with open(command["filename"], "a", encoding="utf-8") as file:
 69 |         file.write(command["text"])
 70 |     return "Command append_to_file returned: File was appended successfully"
 71 | 
 72 | 
 73 | def read_file(command) -> str:
 74 |     """Returns text from a file
 75 |     Args:
 76 |         command: a dictionary containing the "filename"
 77 |     Returns:
 78 |         str: text stored in the file
 79 |     """
 80 |     print("Read file called")
 81 |     try:
 82 |         with open(command["filename"], "r", encoding="utf-8") as file:
 83 |             data = file.read()
 84 |             return f"Command read_file returned: {data}"
 85 |     except Exception as error:
 86 |         return f"Command read_file returned: {error}. First create this file."
 87 | 
 88 | 
 89 | def open_file(command) -> str:
 90 |     """Shows a download button on the Streamlit interface to download the file generated by GPT.
 91 |     Args:
 92 |         command: a dictionary containing the "path" to the file
 93 |     Returns:
 94 |         str: a success message
 95 |     """
 96 |     print("Open file called")
 97 |     try:
 98 |         with open(command["path"], "r", encoding="utf-8") as file:
 99 |             st.download_button("Open File", file, file_name=command["path"])
100 |         return "Command open_file returned: File was opened successfully"
101 |     except Exception as error:
102 |         return f"Command open_file returned: {error}"
103 | 
104 | 
105 | def browse_website(command) -> str:
106 |     """Browse website and extract main content upto TOKEN_LIMIT tokens
107 |     Args:
108 |         command: a dictionary containing "url" to the website
109 |     Returns
110 |         str: the content of that website in json format
111 |     """
112 |     print("Browse website called")
113 |     # grab a HTML file to extract data from
114 |     downloaded = fetch_url(command["url"])
115 | 
116 |     # output main content and comments as plain text
117 |     result = extract(downloaded, output_format="json")
118 | 
119 |     try:
120 |         if len(encoding.encode(str(result))) < TOKEN_LIMIT:
121 |             return "Command browse_website returned: " + str(result)
122 |         return "Command browse_website returned: " + str(result)[:TOKEN_LIMIT]
123 |     except Exception as error:
124 |         return f"Command browse_website returned: {error}"
125 |     
126 | 
127 | 
128 | def google_tool(command) -> str:
129 |     """Searches google for query and returns upto TOKEN_LIMIT tokens of results
130 |     Args:
131 |         command: a dictionary containing "query"
132 |     Returns:
133 |         str: response in json format
134 |     """
135 |     print("Google tool called")
136 |     params = {
137 |         "q": str(command["query"]),
138 |         "location": "Delhi,India",
139 |         "first": 1,
140 |         "count": 10,
141 |         "num": 4,
142 |         "api_key": SERP_API_KEY,
143 |     }
144 | 
145 |     search = GoogleSearch(params)
146 |     results = search.get_dict()
147 | 
148 |     organic_results = []
149 |     page_count = 0
150 |     page_limit = 1
151 | 
152 |     while "error" not in results and page_count < page_limit:
153 |         organic_results.extend(results.get("organic_results", []))
154 | 
155 |         params["first"] += params["count"]
156 |         page_count += 1
157 |         results = search.get_dict()
158 | 
159 |     response = json.dumps(organic_results, indent=2, ensure_ascii=False)
160 |     try:
161 |         if len(encoding.encode(response)) < TOKEN_LIMIT:
162 |             return "Command google returned: " + response
163 |         return "Command google returned: " + response[:TOKEN_LIMIT]
164 |     except Exception as error:
165 |         return f"Command google returned: {error}"
166 | 
167 | def type_message(command) -> None:
168 |     """Displays text on the screen with a typewriter effect
169 |     Args:
170 |         text: any string
171 |     Returns:
172 |         None
173 |     """
174 |     print("Type message called")
175 |     with st.chat_message("assistant"):
176 |         message_placeholder = st.empty()
177 |         full_response = ""
178 |         for response in command["text"]:
179 |             full_response += response
180 |             time.sleep(0.02)
181 |             message_placeholder.markdown(full_response + "▌")
182 |         message_placeholder.markdown(full_response)
183 | 
184 | 
185 | def ask_gpt(messages) -> str:
186 |     """Generates text using the "gpt-3.5-turbo" model
187 |     Args:
188 |         message: a list of dictionaries in the format {"role": <role>, "content": <message>}
189 |     Returns:
190 |         str: text generated by gpt
191 |     """
192 |     reply = client.chat.completions.create(model="gpt-3.5-turbo", messages=messages, temperature=0)
193 |     return reply.choices[0].message.content
194 | 
195 | 
196 | def youtube_transcript(command) -> str:
197 |     """Fetches transcripts from YouTube videos
198 |     Args:
199 |         url: the url of the YouTube video
200 |     Returns:
201 |         str: transcript of the video
202 |     """
203 |     print("Get youtube transcript called")
204 |     try:
205 |         srt_dictionary = YouTubeTranscriptApi.get_transcript(command["video_id"])
206 |         srt_text = " ".join(x["text"] for x in srt_dictionary)
207 |         if len(encoding.encode(srt_text)) < TOKEN_LIMIT:
208 |             return f"Command youtube_transcripts returned:  \"{srt_text}\""
209 |         return f"Command youtube_transcripts returned: \"{srt_text}\""[:TOKEN_LIMIT]
210 |     except Exception as error:
211 |         return f"Command read_file returned: {error}"
212 | 
213 | 
214 | def analyse_uploaded_file(uploaded_file,command)->str:
215 |     """The function extracts the data from docx , pdf and excel files
216 |     Args:
217 |         uploaded_file: File uploaded via streamlit file_uploader
218 |         command: Contains the query to perform on the File
219 |     Returns:
220 |         str: Data analysed from the file.
221 |     """
222 |     extension = uploaded_file.type
223 |     text = ""
224 |     if extension=="application/pdf":
225 |         reader = PdfReader(uploaded_file)
226 |         pages = reader.pages
227 |         for i in range(len(pages)):
228 |             text+=pages[i].extract_text()
229 |     if extension=="application/vnd.openxmlformats-officedocument.wordprocessingml.document":
230 |         doc = Document(uploaded_file)
231 |         for para in doc.paragraphs:
232 |             text+=para.text
233 |     if extension=="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" or extension=="text/csv":
234 |         llm = OpenAI(api_token=OPENAI_API_KEY)
235 |         df = pd.read_excel(uploaded_file)
236 |         df = SmartDataframe(df,config={"llm":llm})
237 |         print(command["query"])
238 |         text = df.chat(command["query"])
239 |         print(text)
240 |     if extension in ["image/png", "image/jpg", "image/jpeg"]:
241 |         img = Image.open(uploaded_file).convert("RGB")
242 |         nimg = np.array(img)
243 |         image = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
244 |         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
245 |         kernel=np.ones((2,2),np.uint8)
246 |         im=cv2.dilate(gray,kernel,iterations=1)
247 |         im=cv2.bitwise_not(im)
248 |         coordinates=np.column_stack(np.where(im<255))
249 |         ang=cv2.minAreaRect(coordinates)[-1]
250 |         print(ang)
251 |         if ang<=90 and ang>0:
252 |             ang=90-ang
253 |         height,width=im.shape[:2]
254 |         centre=(width/2,height/2)
255 |         rot_mat=cv2.getRotationMatrix2D(centre,ang,1.0)
256 |         im=cv2.warpAffine(im,rot_mat,(width,height),borderMode=cv2.BORDER_REFLECT)
257 |         for i in range(im.shape[0]):
258 |             for j in range(im.shape[1]):
259 |                 if im[i, j] >45:
260 |                     im[i, j] = 255  
261 |         text += pytesseract.image_to_string(im)
262 |     try:
263 |         if len(encoding.encode(str(text))) < TOKEN_LIMIT:
264 |             return "Command analyse_uploaded_file returned: " + str(text)
265 |         return "Command analyse_uploaded_file returned: " + str(text)[:TOKEN_LIMIT]
266 |     except Exception as error:
267 |         return f"Command analyse_uploaded_file returned: {error}"
268 | 


--------------------------------------------------------------------------------
/app/pages/Upload File.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | st.title("File Uploader for Analysis")
 4 | 
 5 | uploaded_file = st.file_uploader("If you want to analyse a file upload it before entering the task, Else ignore",type=["pdf","docx","xlsx","png","jpg","jpeg","csv"])
 6 | 
 7 | if "uploaded_file" not in st.session_state:
 8 |     st.session_state["uploaded_file"] = None
 9 | 
10 | if uploaded_file is not None:
11 |     st.session_state.uploaded_file = uploaded_file


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | openai
 2 | google-serp-api
 3 | tiktoken
 4 | wikipedia
 5 | trafilatura
 6 | streamlit
 7 | google-search-results
 8 | python-dotenv
 9 | youtube-transcript-api
10 | openpyxl
11 | PyPDF2
12 | python-docx
13 | pandasai
14 | pillow
15 | pytesseract
16 | opencv-python
17 | numpy
18 | imutils


--------------------------------------------------------------------------------