├── .github
    └── workflows
    │   └── pylint.yml
├── LICENSE
├── README.md
├── app.py
└── requirements.txt


/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
 1 | name: Pylint
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.8", "3.9", "3.10"]
11 |     steps:
12 |     - uses: actions/checkout@v3
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v3
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install pylint
21 |     - name: Analysing the code with pylint
22 |       run: |
23 |         pylint $(git ls-files '*.py')
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Maya Akim
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # autoGNT
 2 | 
 3 | AutoGNT is a fully custamizable, personalized, "smart" newsletter that takes user's input and automatically creates a newsletter based on that input.
 4 | 
 5 | Demo is here:
 6 | https://majacinka-autognt-app-cruudz.streamlit.app/
 7 | 
 8 | <img width="990" alt="Screenshot 2023-06-11 at 10 45 29 PM" src="https://github.com/majacinka/autoGNT/assets/39214611/b4d38c07-a5c2-4bdb-8e2a-eafe802e9c16">
 9 | 
10 | <img width="897" alt="Screenshot 2023-06-10 at 7 49 16 PM" src="https://github.com/majacinka/autoGNT/assets/39214611/606ad6a9-d25a-445e-bbd9-c6c38cbb6104">
11 | 
12 | <img width="1065" alt="Screenshot 2023-06-10 at 8 33 43 PM" src="https://github.com/majacinka/autoGNT/assets/39214611/636987f8-31aa-41ab-8b9f-3f236db5fd3e">
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | #Import and install necessary dependencies 
  2 | 
  3 | #serpapi, requests → Scrape google results
  4 | #sklearn → filter results based on how similar they are 
  5 | #Newspaper3K → extract text from articles 
  6 | #Langchain → split text/summarize it and prompt template in order to generate the title
  7 | #MailGun → send email 
  8 | 
  9 | import streamlit as st 
 10 | import requests
 11 | import json 
 12 | import numpy as np
 13 | 
 14 | from newspaper import Article, ArticleException
 15 | from langchain.text_splitter import TokenTextSplitter
 16 | from langchain.chains.summarize import load_summarize_chain
 17 | from langchain import PromptTemplate, LLMChain, OpenAI
 18 | from sklearn.feature_extraction.text import TfidfVectorizer
 19 | from sklearn.metrics.pairwise import cosine_similarity
 20 | 
 21 | # checks if the fetched newsarticles are identical, and filters out the ones that are too similar
 22 | def is_unique(new_article, articles):
 23 |     if not articles:  # if the list is empty
 24 |         return True
 25 | 
 26 |     # Create a new TfidfVectorizer and transform the article texts into vectors
 27 |     vectorizer = TfidfVectorizer().fit([new_article] + articles)
 28 |     vectors = vectorizer.transform([new_article] + articles)
 29 | 
 30 |     # Calculate the cosine similarity of the new article to each of the existing articles
 31 |     similarity_scores = cosine_similarity(vectors[0:1], vectors[1:])
 32 | 
 33 |     # If the highest similarity score is above a threshold (for example, 0.8), return False (not unique), keep at around 0.6
 34 |     if np.max(similarity_scores) > 0.6:
 35 |         return False
 36 | 
 37 |     # Otherwise, return True (unique)
 38 |     return True
 39 | 
 40 | # Scrapes google search results
 41 | def get_latest_results(query, api_key):
 42 |     params = {
 43 |         "q": query,
 44 |         "location": "United States",
 45 |         "h1": "en",
 46 |         "gl": "us",
 47 |         "google_domain": "google.com",
 48 |         "tbs": "qdr:d",     #Search for results from previous day 
 49 |         "api_key": api_key,
 50 |     }
 51 | 
 52 |     response = requests.get("https://serpapi.com/search", params)
 53 |     results = json.loads(response.text)
 54 | 
 55 |     # List of websites to exclude because you can't scrape them 
 56 |     excluded_websites = ["ft.com", "cointelegraph.com", "cell.com", "futuretools.io"]
 57 | 
 58 |     urls = [r["link"] for r in results["organic_results"] if not any(excluded_site in r["link"] for excluded_site in excluded_websites)][:40] #limit to first 40 results
 59 | 
 60 |     parsed_texts = [] #list to store parsed text and corresponding URL
 61 |     article_texts = []  # list to store original article texts for similarity comparison
 62 | 
 63 |     # Initialize the text_splitter before using it
 64 |     text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=200)
 65 | 
 66 |     #iterate over each URL 
 67 |     for url in urls:
 68 |         try:
 69 |             #create an article object
 70 |             article = Article(url)
 71 | 
 72 |             #download the article 
 73 |             article.download()
 74 | 
 75 |             #parse the article 
 76 |             article.parse()
 77 | 
 78 |              # Check if the new article is unique
 79 |             if not is_unique(article.text, article_texts):
 80 |                 continue  # If not unique, skip to the next article
 81 | 
 82 |             #split text into chunks of 4k tokens 
 83 |             splitted_texts = text_splitter.split_text(article.text)
 84 |             if not splitted_texts:
 85 |              print(article.text)
 86 |               
 87 |             #Append tuple of splitted text and URL to the list
 88 |             parsed_texts.append((splitted_texts, url))
 89 |             article_texts.append(article.text)  # Add the text of the new unique article to the list
 90 | 
 91 |         except ArticleException: 
 92 |             print(f"Failed to download and parse article: {url}")
 93 | 
 94 |     return parsed_texts
 95 | 
 96 | #required by chain.run()
 97 | class Document:
 98 |     def __init__(self, title, text):
 99 |         self.title = title
100 |         self.page_content = text
101 |         self.metadata = {"stop": []} 
102 | 
103 | def summarize_text(to_summarize_texts, openai_api_key):
104 |   
105 |     summarized_texts_titles_urls = []
106 | 
107 |     llm = OpenAI(openai_api_key=openai_api_key, temperature=0.8)
108 |     chain_summarize = load_summarize_chain(llm, chain_type="map_reduce")
109 |     
110 |     # Define prompt that generates titles for summarized text
111 |     prompt = PromptTemplate(
112 |             input_variables=["text"], 
113 |             template="Write an appropriate, clickbaity news article title in less than 70 characters for this text: {text}"
114 |         )
115 |    
116 |     for to_summarize_text, url in to_summarize_texts:
117 |         # Convert each text string to a Document object
118 |         to_summarize_text = [Document('Dummy Title', text) for text in to_summarize_text]
119 |         if not to_summarize_text:  # Check if list is empty before running the chain
120 |           print(f"No text to summarize for URL: {url}")
121 |           continue
122 |         
123 |         # Summarize chunks here
124 |         summarized_text = chain_summarize.run(to_summarize_text)
125 | 
126 |         # prompt template that generates unique titles
127 |         chain_prompt = LLMChain(llm=llm, prompt=prompt)
128 |         clickbait_title = chain_prompt.run(summarized_text)
129 | 
130 |         summarized_texts_titles_urls.append((clickbait_title, summarized_text, url))
131 | 
132 |     return summarized_texts_titles_urls
133 | 
134 | def send_email_mailgun(subject, body, to, from_email, mailgun_domain, mailgun_api_key):
135 |     response = requests.post(
136 |         f"https://api.mailgun.net/v3/{mailgun_domain}/messages",
137 |         auth=("api", mailgun_api_key),
138 |         data={"from": from_email,
139 |               "to": to,
140 |               "subject": subject,
141 |               "text": body})
142 |     
143 |     #in case of an error, what's the status
144 |     print("Status code:", response.status_code)
145 |     print("Response data:", response.text)
146 |     
147 |     return response
148 | 
149 | def main():
150 |     #frontend
151 |     st.title('AutoNewsletter')
152 |     st.markdown("## Please input your API keys")
153 | 
154 |     #create text input field for API keys 
155 |     serpapi_key = st.text_input("Insert your SerpAPI key here: ", type="password")
156 |     openai_api_key = st.text_input("Insert your OpenAI api key: ", type="password")
157 | 
158 |     #create text input field for keyword 
159 |     user_query = st.text_input("Make me a newsletter about: ")
160 |     
161 |     #you'll have to create a Mailgun account and if it's a free one you'll have to make a receiving mail as an authorized recipient 
162 |     st.markdown("## Info necessary for the MailGun to work") 
163 | 
164 |     recipient_mail = st.text_input("Email To: ")
165 |     sending_mail = st.text_input("Email from: ") #email you used to create a MailGun account
166 |     mailgun_domain = st.text_input("Enter your mailgun Domain here: ")
167 |     mailgun_api = st.text_input("Enter your mailgun API key here: ")
168 | 
169 |     if st.button('Submit'):
170 |         st.session_state.serpapi_key = serpapi_key
171 |         st.session_state.user_query = user_query
172 | 
173 |         # Split the result of get_latest_results into two separate variables
174 |         st.session_state.get_splitted_text = get_latest_results(user_query, serpapi_key)
175 |         if not st.session_state.get_splitted_text:
176 |             st.write("No results found.")
177 |         st.session_state.summarized_texts = summarize_text(st.session_state.get_splitted_text, openai_api_key)
178 |         
179 |         for title, summarized_text, url in st.session_state.summarized_texts:
180 |           st.title(title)
181 |           # Add the emoji before the summarized text
182 |           st.write(f"❇️ {summarized_text}")
183 |           st.write(f"🔗 {url}")
184 |           # Create an empty line for a gap
185 |           st.markdown("\n\n")
186 | 
187 |         email_body = ""
188 |         for title, summarized_text, url in st.session_state.summarized_texts:
189 |           email_body += f"❇️{title}\n\n"
190 |           email_body += f"💬{summarized_text}\n\n"
191 |           email_body += f"🔗{url}\n\n"
192 | 
193 |         # Send the email
194 |         send_email_mailgun(
195 |             subject="🤖🤯 This week news about AI", #you can change "AI" to accept the user query variable instead of hardcoded word, but I prefer it like this 
196 |                                                    #because my keywords sometimes get weird and long
197 |             body=email_body, 
198 |             to=recipient_mail, 
199 |             from_email=sending_mail, 
200 |             mailgun_domain=mailgun_domain, 
201 |             mailgun_api_key=mailgun_api
202 |         )
203 | 
204 |     return openai_api_key
205 | 
206 | if __name__ == "__main__":
207 |     main()
208 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit
 2 | requests
 3 | numpy
 4 | newspaper3k
 5 | scikit-learn
 6 | langchain
 7 | mailgun
 8 | tiktoken
 9 | openai
10 | 


--------------------------------------------------------------------------------