├── .streamlit └── config.toml ├── README.md ├── main.py ├── prospects.csv ├── requirements.txt └── template.pptx /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | base="dark" 3 | primaryColor="FF8C00" 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Open in Streamlit](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://arsentievalex-instant-insight-web-app-main-gz753r.streamlit.app/) 2 | 3 | # Instant Insight Web App (3rd place in Streamlit Hackathon for Snowflake Summit 2023) 4 | 5 | This app is designed to generate an instant company research. 6 | 7 | In a matter of few clicks, a user gets a PowerPoint presentation with the company overview, SWOT analysis, financials, and value propostion tailored for the selling product. The app works with the US public companies. 8 | 9 | 10 | Firstly, filter and select a company: 11 | 12 | 13 | 14 | Wait for up to 30 sec and download a PowerPoint: 15 | 16 | 17 | 18 | 19 | [App's URL](https://arsentievalex-instant-insight-web-app-main-gz753r.streamlit.app/) 20 | 21 | Tech Stack: 22 | 23 | • Database - Snowflake via Snowflake Connector 24 | 25 | • Data Processing - Pandas 26 | 27 | • Research Data - Yahoo Finance via Yahooquery, GPT 3.5 via LangChain 28 | 29 | • Visualization - Plotly 30 | 31 | • Frontend - Streamlit, AgGrid 32 | 33 | • Presentation - Python-pptx 34 | 35 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | #import snowflake.connector 3 | import streamlit as st 4 | from streamlit_dynamic_filters import DynamicFilters 5 | from st_aggrid import AgGrid 6 | from st_aggrid.grid_options_builder import GridOptionsBuilder 7 | from st_aggrid import GridUpdateMode, DataReturnMode 8 | import warnings 9 | from yahooquery import Ticker 10 | import plotly.express as px 11 | import plotly.graph_objects as go 12 | from pptx import Presentation 13 | from pptx.util import Inches 14 | from datetime import date 15 | from PIL import Image 16 | import requests 17 | import os 18 | from io import BytesIO 19 | from langchain.chat_models import ChatOpenAI 20 | from langchain.schema import HumanMessage, SystemMessage 21 | import traceback 22 | import re 23 | import ast 24 | 25 | # hide future warnings (caused by st_aggrid) 26 | warnings.simplefilter(action='ignore', category=FutureWarning) 27 | 28 | # set page layout and define basic variables 29 | st.set_page_config(layout="wide", page_icon='⚡', page_title="Instant Insight") 30 | path = os.path.dirname(__file__) 31 | today = date.today() 32 | 33 | 34 | def resize_image(url): 35 | """function to resize logos while keeping aspect ratio. Accepts URL as an argument and return an image object""" 36 | 37 | # Open the image file 38 | image = Image.open(requests.get(url, stream=True).raw) 39 | 40 | # if a logo is too high or too wide then make the background container twice as big 41 | if image.height > 140: 42 | container_width = 220 * 2 43 | container_height = 140 * 2 44 | 45 | elif image.width > 220: 46 | container_width = 220 * 2 47 | container_height = 140 * 2 48 | else: 49 | container_width = 220 50 | container_height = 140 51 | 52 | # Create a new image with the same aspect ratio as the original image 53 | new_image = Image.new('RGBA', (container_width, container_height)) 54 | 55 | # Calculate the position to paste the image so that it is centered 56 | x = (container_width - image.width) // 2 57 | y = (container_height - image.height) // 2 58 | 59 | # Paste the image onto the new image 60 | new_image.paste(image, (x, y)) 61 | return new_image 62 | 63 | 64 | def add_image(slide, image, left, top, width): 65 | """function to add an image to the PowerPoint slide and specify its position and width""" 66 | slide.shapes.add_picture(image, left=left, top=top, width=width) 67 | 68 | 69 | def replace_text(replacements, slide): 70 | """function to replace text on a PowerPoint slide. Takes dict of {match: replacement, ... } and replaces all matches""" 71 | # Iterate through all shapes in the slide 72 | for shape in slide.shapes: 73 | for match, replacement in replacements.items(): 74 | if shape.has_text_frame: 75 | if (shape.text.find(match)) != -1: 76 | text_frame = shape.text_frame 77 | for paragraph in text_frame.paragraphs: 78 | whole_text = "".join(run.text for run in paragraph.runs) 79 | whole_text = whole_text.replace(str(match), str(replacement)) 80 | for idx, run in enumerate(paragraph.runs): 81 | if idx != 0: 82 | p = paragraph._p 83 | p.remove(run._r) 84 | if bool(paragraph.runs): 85 | paragraph.runs[0].text = whole_text 86 | 87 | 88 | def get_stock(ticker, period, interval): 89 | """function to get stock data from Yahoo Finance. Takes ticker, period and interval as arguments and returns a DataFrame""" 90 | hist = ticker.history(period=period, interval=interval) 91 | hist = hist.reset_index() 92 | # capitalize column names 93 | hist.columns = [x.capitalize() for x in hist.columns] 94 | return hist 95 | 96 | 97 | def plot_graph(df, x, y, title, name): 98 | """function to plot a line graph. Takes DataFrame, x and y axis, title and name as arguments and returns a Plotly figure""" 99 | fig = px.line(df, x=x, y=y, template='simple_white', 100 | title='{} {}'.format(name, title)) 101 | fig.update_traces(line_color='#A27D4F') 102 | fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') 103 | return fig 104 | 105 | 106 | def peers_plot(df, name, metric): 107 | """function to plot a bar chart with peers. Takes DataFrame, name, metric and ticker as arguments and returns a Plotly figure""" 108 | 109 | # drop rows with missing metrics 110 | df.dropna(subset=[metric], inplace=True) 111 | 112 | df_sorted = df.sort_values(metric, ascending=False) 113 | 114 | # iterate over the labels and add the colors to the color mapping dictionary, hightlight the selected ticker 115 | color_map = {} 116 | for label in df_sorted['Company Name']: 117 | if label == name: 118 | color_map[label] = '#A27D4F' 119 | else: 120 | color_map[label] = '#D9D9D9' 121 | 122 | fig = px.bar(df_sorted, y='Company Name', x=metric, template='simple_white', color='Company Name', 123 | color_discrete_map=color_map, 124 | orientation='h', 125 | title='{} {} vs Peers FY22'.format(name, metric)) 126 | fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', showlegend=False, yaxis_title='') 127 | return fig 128 | 129 | 130 | def esg_plot(name, df): 131 | # Define colors for types 132 | colors = {name: '#A27D4F', 'Peer Group': '#D9D9D9'} 133 | 134 | # Creating the bar chart 135 | fig = go.Figure() 136 | for type in df['Type'].unique(): 137 | fig.add_trace(go.Bar( 138 | x=df[df['Type'] == type]['variable'], 139 | y=df[df['Type'] == type]['value'], 140 | name=type, 141 | text=df[df['Type'] == type]['value'], 142 | textposition='outside', 143 | marker_color=colors[type] 144 | )) 145 | fig.update_layout( 146 | height=700, 147 | width=1000, 148 | barmode='group', 149 | title="ESG Score vs Peers Average", 150 | xaxis_title="", 151 | yaxis_title="Score", 152 | legend_title="Type", 153 | xaxis=dict(tickangle=0), 154 | paper_bgcolor='rgba(0,0,0,0)', 155 | plot_bgcolor='rgba(0,0,0,0)') 156 | return fig 157 | 158 | 159 | def get_financials(df, col_name, metric_name): 160 | """function to get financial metrics from a DataFrame. Takes DataFrame, column name and metric name as arguments and returns a DataFrame""" 161 | metric = df.loc[:, ['asOfDate', col_name]] 162 | metric_df = pd.DataFrame(metric).reset_index() 163 | metric_df.columns = ['Symbol', 'Year', metric_name] 164 | 165 | return metric_df 166 | 167 | 168 | def generate_gpt_response(gpt_input, max_tokens, api_key, llm_model): 169 | """function to generate a response from GPT-3. Takes input and max tokens as arguments and returns a response""" 170 | # Create an instance of the OpenAI class 171 | chat = ChatOpenAI(openai_api_key=api_key, model=llm_model, 172 | temperature=0, max_tokens=max_tokens) 173 | 174 | # Generate a response from the model 175 | response = chat.predict_messages( 176 | [SystemMessage(content='You are a helpful expert in finance, market and company research.' 177 | 'You also have exceptional skills in selling B2B software products.'), 178 | HumanMessage( 179 | content=gpt_input)]) 180 | 181 | return response.content.strip() 182 | 183 | 184 | def dict_from_string(response): 185 | """function to parse GPT response with competitors tickers and convert it to a dict""" 186 | # Find a substring that starts with '{' and ends with '}', across multiple lines 187 | match = re.search(r'\{.*?\}', response, re.DOTALL) 188 | 189 | dictionary = None 190 | if match: 191 | try: 192 | # Try to convert substring to dict 193 | dictionary = ast.literal_eval(match.group()) 194 | except (ValueError, SyntaxError): 195 | # Not a dictionary 196 | return None 197 | return dictionary 198 | 199 | 200 | def extract_comp_financials(tkr, comp_name, dict): 201 | """function to extract financial metrics for competitors. Takes a ticker as an argument and appends financial metrics to dict""" 202 | ticker = Ticker(tkr) 203 | income_df = ticker.income_statement(frequency='a', trailing=False) 204 | 205 | subset = income_df.loc[:, ['asOfDate', 'TotalRevenue', 'SellingGeneralAndAdministration']].reset_index() 206 | 207 | # keep only 2022 data 208 | subset = subset[subset['asOfDate'].dt.year == 2022].sort_values(by='asOfDate', ascending=False).head(1) 209 | 210 | # get values 211 | total_revenue = subset['TotalRevenue'].values[0] 212 | sg_and_a = subset['SellingGeneralAndAdministration'].values[0] 213 | 214 | # calculate sg&a as a percentage of total revenue 215 | sg_and_a_pct = round(sg_and_a / total_revenue * 100, 2) 216 | 217 | # add values to dictionary 218 | dict[comp_name]['Total Revenue'] = total_revenue 219 | dict[comp_name]['SG&A % Of Revenue'] = sg_and_a_pct 220 | 221 | 222 | def convert_to_nested_dict(input_dict, nested_key): 223 | """function to convert a dictionary to a nested dictionary. Takes a dictionary and a nested key as arguments and returns a dictionary""" 224 | output_dict = {} 225 | for key, value in input_dict.items(): 226 | output_dict[key] = {nested_key: value} 227 | return output_dict 228 | 229 | 230 | def shorten_summary(text): 231 | # Split the text into sentences using a regular expression pattern 232 | sentences = re.split(r'(?