├── sampleoutput.png ├── README.md ├── LICENSE └── Stock news.py /sampleoutput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asad70/stock-news-sentiment-analysis/HEAD/sampleoutput.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stock-news-sentiment-analysis 2 | 3 | 4 | Purpose: To analyze the news headline of a specific stock. 5 | This program uses Vader SentimentIntensityAnalyzer to calculate the news headline compound value of a stock for a given day. 6 | 7 | # How to run: 8 | 9 | python3 'Stock news.py' 10 | 11 | Analyze multiple stocks at the same time. Ex: 'AAPL, MSFT, F, TSLA' separate each input by a comma. 12 | 13 | Analyze all news or a specific date of news. 14 | Ex: "Enter the date/press enter for today's news (Ex: Dec-27-20) or 'All' for all the available news: " 15 | hit enter for today's news, enter a specific date, or type all for all the news (limited). 16 | 17 | You can also ignore source: Ex: ignore_source = ['Motley Fool', 'TheStreet.com'] 18 | 19 | Limitations: 20 | This program only analyzes headlines and only for the dates that have news available on finviz. 21 | 22 | Example output: 23 | ![](sampleoutput.png) 24 | 25 | 26 | 27 | ## License 28 | 29 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE) file for details. 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 asad70 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Stock news.py: -------------------------------------------------------------------------------- 1 | '''***************************************************************************** 2 | Purpose: To analyze the news headline of a specific stock. 3 | This program uses Vader SentimentIntensityAnalyzer to calculate the news headline 4 | compound value of a stock for a given day. 5 | You can analyze multiple stocks at the same time. Ex: 'AAPL, MSFT, F, TSLA' separate 6 | each input by a comma. 7 | You can also analyze all news or a specific date of news. 8 | You can also ignore source: Ex: ignore_source = ['Motley Fool', 'TheStreet.com'] 9 | Limitations: 10 | This program only analyzes headlines and only for the dates that have available news 11 | on finviz. 12 | ------------------------------------------------------------------- 13 | ****************************************************************************''' 14 | import pandas as pd 15 | import matplotlib.pyplot as plt 16 | from datetime import date, timedelta 17 | from urllib.request import urlopen, Request 18 | from bs4 import BeautifulSoup 19 | from nltk.sentiment.vader import SentimentIntensityAnalyzer 20 | 21 | 22 | tickers = input("Enter a valid ticker(for multiple tickers separate by ',') ex 'AAPL, MSFT': ").replace(" ", "") 23 | 24 | tickers = tickers.split(",") 25 | 26 | # Getting Finviz Data 27 | news_tables = {} # contains each ticker headlines 28 | for ticker in tickers: 29 | url = f'https://finviz.com/quote.ashx?t={ticker}' 30 | req = Request(url=url, headers={'user-agent': 'news'}) 31 | response = urlopen(req) # taking out html response 32 | 33 | html = BeautifulSoup(response, features = 'html.parser') 34 | news_table = html.find(id = 'news-table') # gets the html object of entire table 35 | news_tables[ticker] = news_table 36 | 37 | ignore_source = ['Motley Fool', 'TheStreet.com'] # sources to exclude 38 | 39 | # getting date 40 | date_allowed = [] 41 | start = input("Enter the date/press enter for today's news (Ex: Dec-27-20) or 'All' for all the available news: ") 42 | if len(start) == 0: 43 | start = date.today().strftime("%b-%d-%y") 44 | date_allowed.append(start) 45 | 46 | 47 | # Parsing and Manipulating 48 | parsed = [] 49 | for ticker, news_table in news_tables.items(): # iterating thru key and value 50 | for row in news_table.findAll('tr'): # for each row that contains 'tr' 51 | title = row.a.text 52 | source = row.span.text 53 | date = row.td.text.split(' ') 54 | if len(date) > 1: # both date and time, ex: Dec-27-20 10:00PM 55 | date1 = date[0] 56 | time = date[1] 57 | else:time = date[0] # only time is given ex: 05:00AM 58 | 59 | if source.strip() not in ignore_source: 60 | if start.lower() == 'all': 61 | parsed.append([ticker, date1, time, title]) 62 | elif date1 in date_allowed: 63 | parsed.append([ticker, date1, time, title]) 64 | else: break 65 | 66 | 67 | # Applying Sentiment Analysis 68 | df = pd.DataFrame(parsed, columns=['Ticker', 'date', 'Time', 'Title']) 69 | vader = SentimentIntensityAnalyzer() 70 | 71 | # for every title in data set, give the compund score 72 | score = lambda title: vader.polarity_scores(title)['compound'] 73 | df['compound'] = df['Title'].apply(score) # adds compund score to data frame 74 | 75 | # Visualization of Sentiment Analysis 76 | df['date'] = pd.to_datetime(df.date).dt.date # takes date comlumn convert it to date/time format 77 | 78 | plt.figure(figsize=(6,6)) # figure size 79 | # unstack() allows us to have dates as x-axis 80 | mean_df = df.groupby(['date', 'Ticker']).mean() # avg compund score for each date 81 | mean_df = mean_df.unstack() 82 | 83 | # xs (cross section of compund) get rids of compund label 84 | mean_df = mean_df.xs('compound', axis="columns") 85 | mean_df.plot(kind='bar') 86 | plt.show() 87 | --------------------------------------------------------------------------------