├── static
    └── favicon.png
├── requirements.txt
├── README.md
├── LICENSE
├── app.py
└── templates
    └── index.html


/static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LexiestLeszek/sova_ollama/HEAD/static/favicon.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.11.1
 2 | Flask==3.0.2
 3 | google==3.0.0
 4 | gunicorn==21.2.0
 5 | langchain==0.1.6
 6 | langchain_community==0.0.19
 7 | ollama==0.1.6
 8 | Requests==2.31.0
 9 | translators==5.8.9
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### Sova
 2 | 
 3 | ## Description
 4 | 
 5 | Sova is a web search engine that utilizes power of Large Language Models (LLM) to return full text answer instead of bunch of links like traditional search engines do.
 6 | 
 7 | ## Installation
 8 | 
 9 | Before installing Sova, ensure you have Python installed on your system. You will also need pip, which is typically included with Python.
10 | 
11 | To install Sova, follow these steps:
12 | 
13 | 1. Clone the repository using this command in terminal: `git clone https://github.com/LexiestLeszek/sova_ollama.git`
14 | 2. Install the required dependencies using this command in terminal: `pip install -r requirements.txt`
15 | 3. Install Ollama from their website https://ollama.com/
16 | 4. Download the local LLM (in our case - qwen:1.8b) using this command in terminal: `ollama pull qwen:1.8b`
17 | 5. Run the gunicorn server using this command in terminal: `gunicorn -b 0.0.0.0:8080 app:app`
18 | 6. Use the search-bar to input your search query or or question.
19 | 
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Leszek Mielnikow
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, render_template, request
  2 | from googlesearch import search
  3 | import re
  4 | import translators as ts
  5 | import gunicorn
  6 | import requests
  7 | import ollama
  8 | from googlesearch import search
  9 | import requests
 10 | from bs4 import BeautifulSoup
 11 | from langchain_community.embeddings import HuggingFaceEmbeddings
 12 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 13 | from langchain_community.vectorstores import Chroma
 14 | 
 15 | app = Flask(__name__)
 16 | 
 17 | def get_context(question):
 18 |     print("Init Search")
 19 |     search_results = search(question, num=5, stop=5, pause=2)
 20 |     top_links = list(search_results)
 21 |     exclude_list = ["google", "facebook", "twitter", "instagram", "youtube", "tiktok", "kremlin"]
 22 |     top_links = [link for link in top_links if not any(domain in link for domain in exclude_list)]
 23 |     scraped_texts = []
 24 |     for link in top_links:
 25 |         print(link)
 26 |         try:
 27 |             page = requests.get(link)
 28 |             soup = BeautifulSoup(page.content, 'html.parser')
 29 |             text = ' '.join([p.get_text() for p in soup.find_all('p')])
 30 |             print(text)
 31 |         except Exception as e:
 32 |             print(f"Failed to scrape the link: {link}\nError: {e}")
 33 |         scraped_texts.append(text)
 34 | 
 35 |     all_scraped_text = '.\n'.join(scraped_texts)
 36 |     
 37 |     print(all_scraped_text)
 38 |     
 39 |     text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
 40 |     documents = text_splitter.split_text(all_scraped_text)
 41 |     embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')
 42 | 
 43 |     db = Chroma.from_texts(documents, embedding=embeddings)
 44 |     retriever = db.as_retriever(search_kwargs={"k": 3})
 45 |     
 46 |     context = retriever.get_relevant_documents(question)
 47 | 
 48 |     return context
 49 | 
 50 | # Function to generate output using the LLM model
 51 | def generate_output(question,context):
 52 |     # Prepare the prompt with the input text
 53 |     # Define a prompt template
 54 |     prompt = f"""Use the following context to answer the question at the end.
 55 |     If you don't know the answer, just say that you don't know, don't try to make up an answer.
 56 |     Answer only factual information based on the context below.
 57 |     Context: {context}.\n
 58 |     Question: {question}\n
 59 |     Helpful Answer:"""
 60 |     
 61 |     response = ollama.chat(model='qwen:1.8b', messages=[
 62 |         
 63 |         {
 64 |             'role': 'system',
 65 |             'content': 'You are a Helpful AI assistant that answers based on the information from the user prompt Context and nothing else.',
 66 |             
 67 |             'role': 'user',
 68 |             'content': f'{prompt}',
 69 |             },
 70 |         ])
 71 |     
 72 |     output = response['message']['content']
 73 |     
 74 |     # Return the generated text
 75 |     return output
 76 | 
 77 | @app.route('/', methods=['GET', 'POST'])
 78 | def index():
 79 |     query = ""
 80 |     answer = ""
 81 | 
 82 |     if request.method == 'POST':
 83 |         query = request.form['query']
 84 | 
 85 |         if query:
 86 |         
 87 |             context = get_context(query)
 88 |             
 89 |             # Generate the output using the LLM model
 90 |             answer = generate_output(query,context)
 91 |             
 92 |             print(answer)
 93 | 
 94 |     return render_template('index.html', query=query, answer=answer)
 95 | 
 96 | #if __name__ == '__main__':
 97 | #    app.run(debug=True, port="8021")
 98 | 
 99 | # to run on ngrok:    
100 | # 1. in main Mac terminal: "ngrok http 8080"
101 | # 2. run this app in IDE terminal: "gunicorn -b 0.0.0.0:8080 app:app"
102 | 
103 | # # Find the process running on port 8081
104 | # lsof -ti tcp:PORT
105 | 
106 | # Kill the process (replace PID with the actual process ID)
107 | # kill PID
108 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <link rel="icon" type="image/x-icon" href="{{ url_for('static', filename='favicon.png') }}">
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>Sova Search</title>
  8 |     <style>
  9 | 
 10 |     body {
 11 |         font-family: Arial, sans-serif;
 12 |         margin: 0;
 13 |         padding: 0;
 14 |         display: flex;
 15 |         justify-content: center;
 16 |         align-items: center;
 17 |         height: 100vh;
 18 |         background-color: rgb(12,12,12);
 19 |     }
 20 | 
 21 |     h1 {
 22 |         margin-bottom: 5px; /* Adjust this value as needed */
 23 |     }
 24 | 
 25 |     /* Style for the main container */
 26 |     .container {
 27 |         text-align: center;
 28 |         color: white;
 29 |         max-width: 90%;
 30 |     }
 31 | 
 32 |     /* Style for the form section */
 33 |     .search-form {
 34 |         margin-bottom: 10px;
 35 |     }
 36 | 
 37 |     input[type="text"] {
 38 |         padding: 3px;
 39 |         border: 1px solid #ccc;
 40 |         border-radius: 5px;
 41 |         font-size: 20px;
 42 |         background-color: #f5f5f5;
 43 |         width: 600px; /* Adjust width as needed */
 44 |         max-width: 90%;
 45 |         height: 45px;
 46 |         box-sizing: border-box; /* Ensure padding doesn't affect width */
 47 |         margin-bottom: 5px;
 48 |     }
 49 | 
 50 |     /* Keyframes for the clicking animation */
 51 |     @keyframes clickAnimation {
 52 |         0% {
 53 |             transform: scale(1);
 54 |         }
 55 |         50% {
 56 |             transform: scale(0.9);
 57 |         }
 58 |         100% {
 59 |             transform: scale(1);
 60 |         }
 61 |     }
 62 | 
 63 |     input[type="submit"] {
 64 |         margin-top: 5px;
 65 |         padding: 10px 20px; /* Adjust padding for height and width */
 66 |         background-color: #005eaa;
 67 |         color:  #f5f5f5;
 68 |         border: none;
 69 |         border-radius: 5px;
 70 |         width: 100px;
 71 |         height: 40px;
 72 |         font-size: 18px;
 73 |         font-weight: bold;
 74 |         cursor: pointer;
 75 |         transition: background-color 0.3s ease, transform 0.2s ease; /* Added transform property for animation */
 76 |         margin-top: 10px; /* Adjust margin-top for positioning */
 77 |     }
 78 | 
 79 |     /* Add animation for the :active state of the submit button */
 80 |     input[type="submit"]:active {
 81 |         animation: clickAnimation 0.3s ease;
 82 |     }
 83 | 
 84 |     input[type="submit"]:hover {
 85 |         background-color: rgb(55, 55, 55);
 86 |     }
 87 | 
 88 |     /* Style for the answer section */
 89 |     .answer {
 90 |         text-align: left;
 91 |         margin-bottom: 20px;
 92 |     }
 93 | 
 94 |     /* Style for the sources section */
 95 |     .sources {
 96 |         list-style: none;
 97 |         padding: 0;
 98 |         text-align: left;
 99 |         max-width: 100%;
100 |     }
101 | 
102 |     .sources li {
103 |         margin-bottom: 5px;
104 |         padding-left: 0;
105 |         font-size: 12px;
106 |         text-overflow: ellipsis;
107 |         word-wrap: break-word; 
108 |     }
109 | 
110 |     .sources a {
111 |         color: #42a5f5;
112 |         text-decoration: none;
113 |     }
114 | 
115 |     .sources a:hover {
116 |         text-decoration: underline;
117 |     }
118 | 
119 |     /* Style for the result box */
120 |     .result-box {
121 |         background-color: None;
122 |         padding: 20px;
123 |         width: 600px;
124 |         max-width: 90%;
125 |         border-radius: 8px;
126 |         margin-top: 20px;
127 |     }
128 | 
129 |     /* Additional styles for the content inside the result box */
130 |     .result-box p {
131 |         color:  #f5f5f5;
132 |         font-size: 18px;
133 |         margin: 0;
134 |     }
135 | 
136 |     .result-box h3 {
137 |         color:  #f5f5f5;
138 |         margin-top: 10px;
139 |     }
140 | 
141 |     .result-box ul {
142 |         margin-top: 5px;
143 |     }
144 | 
145 |     .result-box a {
146 |         color:  #f5f5f5;
147 |         text-decoration: none;
148 |     }
149 | 
150 |     .result-box a:hover {
151 |         text-decoration: underline;
152 |     }
153 |     </style>
154 | </head>
155 | <body>
156 |     <div class="container">
157 |         <h1>Sova 🦉</h1>
158 |         <div class="search-form">
159 |             <form action="/" method="post">
160 |                 <label for="query"></label><br>
161 |                 <input type="text" id="query" name="query" placeholder=" Ask in English... "><br><br>
162 |                 <input type="submit" value="Find">
163 |             </form>
164 |         </div>
165 | 
166 |         {% if query %}
167 |         <div class="result-box">
168 |             <div class="answer">
169 |                 <h3>💡 Answer: </h3>
170 |                 <pre style="white-space: pre-wrap;">{{ answer }}</pre>
171 |             </div>
172 |         </div>
173 |         {% endif %}
174 |     </div>
175 |     <script>
176 |         document.getElementById('query').addEventListener('input', function(event) {
177 | //            const regex = /^[\u0400-\u04FF0-9\s!?*:\-+*/=,.]+$/g;
178 | //          restrincts a lot
179 |             const regex = /^[^\u003C\u003E;{}[\]]+$/g;
180 | //          everything except {}[\]]
181 | 
182 |             let input = event.target.value;
183 |             let isValid = regex.test(input);
184 |         
185 |             while (!isValid && input.length > 0) {
186 |                 input = input.slice(0, -1);
187 |                 isValid = regex.test(input);
188 |             }
189 |         
190 |             event.target.value = input;
191 |         });
192 | 
193 |         document.addEventListener('DOMContentLoaded', function() {
194 |             document.querySelector('.search-form').addEventListener('submit', function(event) {
195 |                 // Disable the submit button
196 |                 document.querySelector('input[type="submit"]').disabled = true;
197 | 
198 |                 // Enable the button after 15 seconds
199 |                 setTimeout(function() {
200 |                     document.querySelector('input[type="submit"]').disabled = false;
201 |                 }, 15000); // 15 seconds in milliseconds
202 |             });
203 |         });
204 |     </script>
205 | </body>
206 | </html>
207 | 


--------------------------------------------------------------------------------